xref: /qemu/target/s390x/tcg/mem_helper.c (revision 513823e7521a09ed7ad1e32e6454bac3b2cbf52d)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "qemu/log.h"
23 #include "cpu.h"
24 #include "s390x-internal.h"
25 #include "tcg_s390x.h"
26 #include "exec/helper-proto.h"
27 #include "exec/cpu-common.h"
28 #include "exec/exec-all.h"
29 #include "exec/page-protection.h"
30 #include "exec/cpu_ldst.h"
31 #include "hw/core/tcg-cpu-ops.h"
32 #include "qemu/int128.h"
33 #include "qemu/atomic128.h"
34 
35 #if defined(CONFIG_USER_ONLY)
36 #include "user/page-protection.h"
37 #else
38 #include "hw/s390x/storage-keys.h"
39 #include "hw/boards.h"
40 #endif
41 
42 #ifdef CONFIG_USER_ONLY
43 # define user_or_likely(X)    true
44 #else
45 # define user_or_likely(X)    likely(X)
46 #endif
47 
48 /*****************************************************************************/
49 /* Softmmu support */
50 
51 /* #define DEBUG_HELPER */
52 #ifdef DEBUG_HELPER
53 #define HELPER_LOG(x...) qemu_log(x)
54 #else
55 #define HELPER_LOG(x...)
56 #endif
57 
58 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
59 {
60     uint16_t pkm = env->cregs[3] >> 16;
61 
62     if (env->psw.mask & PSW_MASK_PSTATE) {
63         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
64         return pkm & (0x8000 >> psw_key);
65     }
66     return true;
67 }
68 
69 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
70                                    uint64_t src, uint32_t len)
71 {
72     if (!len || src == dest) {
73         return false;
74     }
75     /* Take care of wrapping at the end of address space. */
76     if (unlikely(wrap_address(env, src + len - 1) < src)) {
77         return dest > src || dest <= wrap_address(env, src + len - 1);
78     }
79     return dest > src && dest <= src + len - 1;
80 }
81 
82 /* Trigger a SPECIFICATION exception if an address or a length is not
83    naturally aligned.  */
84 static inline void check_alignment(CPUS390XState *env, uint64_t v,
85                                    int wordsize, uintptr_t ra)
86 {
87     if (v % wordsize) {
88         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
89     }
90 }
91 
92 /* Load a value from memory according to its size.  */
93 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
94                                            int wordsize, uintptr_t ra)
95 {
96     switch (wordsize) {
97     case 1:
98         return cpu_ldub_data_ra(env, addr, ra);
99     case 2:
100         return cpu_lduw_data_ra(env, addr, ra);
101     default:
102         abort();
103     }
104 }
105 
106 /* Store a to memory according to its size.  */
107 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
108                                       uint64_t value, int wordsize,
109                                       uintptr_t ra)
110 {
111     switch (wordsize) {
112     case 1:
113         cpu_stb_data_ra(env, addr, value, ra);
114         break;
115     case 2:
116         cpu_stw_data_ra(env, addr, value, ra);
117         break;
118     default:
119         abort();
120     }
121 }
122 
123 /* An access covers at most 4096 bytes and therefore at most two pages. */
124 typedef struct S390Access {
125     target_ulong vaddr1;
126     target_ulong vaddr2;
127     void *haddr1;
128     void *haddr2;
129     uint16_t size1;
130     uint16_t size2;
131     /*
132      * If we can't access the host page directly, we'll have to do I/O access
133      * via ld/st helpers. These are internal details, so we store the
134      * mmu idx to do the access here instead of passing it around in the
135      * helpers.
136      */
137     int mmu_idx;
138 } S390Access;
139 
140 /*
141  * With nonfault=1, return the PGM_ exception that would have been injected
142  * into the guest; return 0 if no exception was detected.
143  *
144  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
145  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
146  */
147 static inline int s390_probe_access(CPUArchState *env, target_ulong addr,
148                                     int size, MMUAccessType access_type,
149                                     int mmu_idx, bool nonfault,
150                                     void **phost, uintptr_t ra)
151 {
152     int flags = probe_access_flags(env, addr, size, access_type, mmu_idx,
153                                    nonfault, phost, ra);
154 
155     if (unlikely(flags & TLB_INVALID_MASK)) {
156 #ifdef CONFIG_USER_ONLY
157         /* Address is in TEC in system mode; see s390_cpu_record_sigsegv. */
158         env->__excp_addr = addr & TARGET_PAGE_MASK;
159         return (page_get_flags(addr) & PAGE_VALID
160                 ? PGM_PROTECTION : PGM_ADDRESSING);
161 #else
162         return env->tlb_fill_exc;
163 #endif
164     }
165 
166 #ifndef CONFIG_USER_ONLY
167     if (unlikely(flags & TLB_WATCHPOINT)) {
168         /* S390 does not presently use transaction attributes. */
169         cpu_check_watchpoint(env_cpu(env), addr, size,
170                              MEMTXATTRS_UNSPECIFIED,
171                              (access_type == MMU_DATA_STORE
172                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
173     }
174 #endif
175 
176     return 0;
177 }
178 
179 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
180                              bool nonfault, vaddr vaddr1, int size,
181                              MMUAccessType access_type,
182                              int mmu_idx, uintptr_t ra)
183 {
184     int size1, size2, exc;
185 
186     assert(size > 0 && size <= 4096);
187 
188     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
189     size2 = size - size1;
190 
191     memset(access, 0, sizeof(*access));
192     access->vaddr1 = vaddr1;
193     access->size1 = size1;
194     access->size2 = size2;
195     access->mmu_idx = mmu_idx;
196 
197     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
198                             &access->haddr1, ra);
199     if (unlikely(exc)) {
200         return exc;
201     }
202     if (unlikely(size2)) {
203         /* The access crosses page boundaries. */
204         vaddr vaddr2 = wrap_address(env, vaddr1 + size1);
205 
206         access->vaddr2 = vaddr2;
207         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
208                                 nonfault, &access->haddr2, ra);
209         if (unlikely(exc)) {
210             return exc;
211         }
212     }
213     return 0;
214 }
215 
216 static inline void access_prepare(S390Access *ret, CPUS390XState *env,
217                                   vaddr vaddr, int size,
218                                   MMUAccessType access_type, int mmu_idx,
219                                   uintptr_t ra)
220 {
221     int exc = access_prepare_nf(ret, env, false, vaddr, size,
222                                 access_type, mmu_idx, ra);
223     assert(!exc);
224 }
225 
226 /* Helper to handle memset on a single page. */
227 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
228                              uint8_t byte, uint16_t size, int mmu_idx,
229                              uintptr_t ra)
230 {
231     if (user_or_likely(haddr)) {
232         memset(haddr, byte, size);
233     } else {
234         MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
235         for (int i = 0; i < size; i++) {
236             cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
237         }
238     }
239 }
240 
241 static void access_memset(CPUS390XState *env, S390Access *desta,
242                           uint8_t byte, uintptr_t ra)
243 {
244     set_helper_retaddr(ra);
245     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
246                      desta->mmu_idx, ra);
247     if (unlikely(desta->size2)) {
248         do_access_memset(env, desta->vaddr2, desta->haddr2, byte,
249                          desta->size2, desta->mmu_idx, ra);
250     }
251     clear_helper_retaddr();
252 }
253 
254 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
255                                int offset, uintptr_t ra)
256 {
257     target_ulong vaddr = access->vaddr1;
258     void *haddr = access->haddr1;
259 
260     if (unlikely(offset >= access->size1)) {
261         offset -= access->size1;
262         vaddr = access->vaddr2;
263         haddr = access->haddr2;
264     }
265 
266     if (user_or_likely(haddr)) {
267         return ldub_p(haddr + offset);
268     } else {
269         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
270         return cpu_ldb_mmu(env, vaddr + offset, oi, ra);
271     }
272 }
273 
274 static void access_set_byte(CPUS390XState *env, S390Access *access,
275                             int offset, uint8_t byte, uintptr_t ra)
276 {
277     target_ulong vaddr = access->vaddr1;
278     void *haddr = access->haddr1;
279 
280     if (unlikely(offset >= access->size1)) {
281         offset -= access->size1;
282         vaddr = access->vaddr2;
283         haddr = access->haddr2;
284     }
285 
286     if (user_or_likely(haddr)) {
287         stb_p(haddr + offset, byte);
288     } else {
289         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
290         cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
291     }
292 }
293 
294 /*
295  * Move data with the same semantics as memmove() in case ranges don't overlap
296  * or src > dest. Undefined behavior on destructive overlaps.
297  */
298 static void access_memmove(CPUS390XState *env, S390Access *desta,
299                            S390Access *srca, uintptr_t ra)
300 {
301     int len = desta->size1 + desta->size2;
302 
303     assert(len == srca->size1 + srca->size2);
304 
305     /* Fallback to slow access in case we don't have access to all host pages */
306     if (user_or_likely(desta->haddr1 &&
307                        srca->haddr1 &&
308                        (!desta->size2 || desta->haddr2) &&
309                        (!srca->size2 || srca->haddr2))) {
310         int diff = desta->size1 - srca->size1;
311 
312         if (likely(diff == 0)) {
313             memmove(desta->haddr1, srca->haddr1, srca->size1);
314             if (unlikely(srca->size2)) {
315                 memmove(desta->haddr2, srca->haddr2, srca->size2);
316             }
317         } else if (diff > 0) {
318             memmove(desta->haddr1, srca->haddr1, srca->size1);
319             memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
320             if (likely(desta->size2)) {
321                 memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
322             }
323         } else {
324             diff = -diff;
325             memmove(desta->haddr1, srca->haddr1, desta->size1);
326             memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
327             if (likely(srca->size2)) {
328                 memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
329             }
330         }
331     } else {
332         for (int i = 0; i < len; i++) {
333             uint8_t byte = access_get_byte(env, srca, i, ra);
334             access_set_byte(env, desta, i, byte, ra);
335         }
336     }
337 }
338 
339 static int mmu_idx_from_as(uint8_t as)
340 {
341     switch (as) {
342     case AS_PRIMARY:
343         return MMU_PRIMARY_IDX;
344     case AS_SECONDARY:
345         return MMU_SECONDARY_IDX;
346     case AS_HOME:
347         return MMU_HOME_IDX;
348     default:
349         /* FIXME AS_ACCREG */
350         g_assert_not_reached();
351     }
352 }
353 
354 /* and on array */
355 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
356                              uint64_t src, uintptr_t ra)
357 {
358     const int mmu_idx = s390x_env_mmu_index(env, false);
359     S390Access srca1, srca2, desta;
360     uint32_t i;
361     uint8_t c = 0;
362 
363     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
364                __func__, l, dest, src);
365 
366     /* NC always processes one more byte than specified - maximum is 256 */
367     l++;
368 
369     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
370     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
371     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
372     set_helper_retaddr(ra);
373 
374     for (i = 0; i < l; i++) {
375         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
376                           access_get_byte(env, &srca2, i, ra);
377 
378         c |= x;
379         access_set_byte(env, &desta, i, x, ra);
380     }
381 
382     clear_helper_retaddr();
383     return c != 0;
384 }
385 
386 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
387                     uint64_t src)
388 {
389     return do_helper_nc(env, l, dest, src, GETPC());
390 }
391 
392 /* xor on array */
393 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
394                              uint64_t src, uintptr_t ra)
395 {
396     const int mmu_idx = s390x_env_mmu_index(env, false);
397     S390Access srca1, srca2, desta;
398     uint32_t i;
399     uint8_t c = 0;
400 
401     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
402                __func__, l, dest, src);
403 
404     /* XC always processes one more byte than specified - maximum is 256 */
405     l++;
406 
407     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
408     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
409     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
410 
411     /* xor with itself is the same as memset(0) */
412     if (src == dest) {
413         access_memset(env, &desta, 0, ra);
414         return 0;
415     }
416 
417     set_helper_retaddr(ra);
418     for (i = 0; i < l; i++) {
419         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
420                           access_get_byte(env, &srca2, i, ra);
421 
422         c |= x;
423         access_set_byte(env, &desta, i, x, ra);
424     }
425     clear_helper_retaddr();
426     return c != 0;
427 }
428 
429 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
430                     uint64_t src)
431 {
432     return do_helper_xc(env, l, dest, src, GETPC());
433 }
434 
435 /* or on array */
436 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
437                              uint64_t src, uintptr_t ra)
438 {
439     const int mmu_idx = s390x_env_mmu_index(env, false);
440     S390Access srca1, srca2, desta;
441     uint32_t i;
442     uint8_t c = 0;
443 
444     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
445                __func__, l, dest, src);
446 
447     /* OC always processes one more byte than specified - maximum is 256 */
448     l++;
449 
450     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
451     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
452     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
453     set_helper_retaddr(ra);
454 
455     for (i = 0; i < l; i++) {
456         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
457                           access_get_byte(env, &srca2, i, ra);
458 
459         c |= x;
460         access_set_byte(env, &desta, i, x, ra);
461     }
462 
463     clear_helper_retaddr();
464     return c != 0;
465 }
466 
467 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
468                     uint64_t src)
469 {
470     return do_helper_oc(env, l, dest, src, GETPC());
471 }
472 
473 /* memmove */
474 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
475                               uint64_t src, uintptr_t ra)
476 {
477     const int mmu_idx = s390x_env_mmu_index(env, false);
478     S390Access srca, desta;
479     uint32_t i;
480 
481     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
482                __func__, l, dest, src);
483 
484     /* MVC always copies one more byte than specified - maximum is 256 */
485     l++;
486 
487     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
488     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
489 
490     /*
491      * "When the operands overlap, the result is obtained as if the operands
492      * were processed one byte at a time". Only non-destructive overlaps
493      * behave like memmove().
494      */
495     if (dest == src + 1) {
496         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
497     } else if (!is_destructive_overlap(env, dest, src, l)) {
498         access_memmove(env, &desta, &srca, ra);
499     } else {
500         set_helper_retaddr(ra);
501         for (i = 0; i < l; i++) {
502             uint8_t byte = access_get_byte(env, &srca, i, ra);
503 
504             access_set_byte(env, &desta, i, byte, ra);
505         }
506         clear_helper_retaddr();
507     }
508 
509     return env->cc_op;
510 }
511 
512 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
513 {
514     do_helper_mvc(env, l, dest, src, GETPC());
515 }
516 
517 /* move right to left */
518 void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
519 {
520     const int mmu_idx = s390x_env_mmu_index(env, false);
521     const uint64_t ra = GETPC();
522     S390Access srca, desta;
523     int32_t i;
524 
525     /* MVCRL always copies one more byte than specified - maximum is 256 */
526     l &= 0xff;
527     l++;
528 
529     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
530     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
531 
532     set_helper_retaddr(ra);
533     for (i = l - 1; i >= 0; i--) {
534         uint8_t byte = access_get_byte(env, &srca, i, ra);
535         access_set_byte(env, &desta, i, byte, ra);
536     }
537     clear_helper_retaddr();
538 }
539 
540 /* move inverse  */
541 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
542 {
543     const int mmu_idx = s390x_env_mmu_index(env, false);
544     S390Access srca, desta;
545     uintptr_t ra = GETPC();
546     int i;
547 
548     /* MVCIN always copies one more byte than specified - maximum is 256 */
549     l++;
550 
551     src = wrap_address(env, src - l + 1);
552     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
553     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
554 
555     set_helper_retaddr(ra);
556     for (i = 0; i < l; i++) {
557         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
558         access_set_byte(env, &desta, i, x, ra);
559     }
560     clear_helper_retaddr();
561 }
562 
563 /* move numerics  */
564 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
565 {
566     const int mmu_idx = s390x_env_mmu_index(env, false);
567     S390Access srca1, srca2, desta;
568     uintptr_t ra = GETPC();
569     int i;
570 
571     /* MVN always copies one more byte than specified - maximum is 256 */
572     l++;
573 
574     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
575     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
576     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
577 
578     set_helper_retaddr(ra);
579     for (i = 0; i < l; i++) {
580         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
581                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
582 
583         access_set_byte(env, &desta, i, x, ra);
584     }
585     clear_helper_retaddr();
586 }
587 
588 /* move with offset  */
589 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
590 {
591     const int mmu_idx = s390x_env_mmu_index(env, false);
592     /* MVO always processes one more byte than specified - maximum is 16 */
593     const int len_dest = (l >> 4) + 1;
594     const int len_src = (l & 0xf) + 1;
595     uintptr_t ra = GETPC();
596     uint8_t byte_dest, byte_src;
597     S390Access srca, desta;
598     int i, j;
599 
600     access_prepare(&srca, env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
601     access_prepare(&desta, env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
602 
603     /* Handle rightmost byte */
604     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
605 
606     set_helper_retaddr(ra);
607     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
608     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
609     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
610 
611     /* Process remaining bytes from right to left */
612     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
613         byte_dest = byte_src >> 4;
614         if (j >= 0) {
615             byte_src = access_get_byte(env, &srca, j, ra);
616         } else {
617             byte_src = 0;
618         }
619         byte_dest |= byte_src << 4;
620         access_set_byte(env, &desta, i, byte_dest, ra);
621     }
622     clear_helper_retaddr();
623 }
624 
625 /* move zones  */
626 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
627 {
628     const int mmu_idx = s390x_env_mmu_index(env, false);
629     S390Access srca1, srca2, desta;
630     uintptr_t ra = GETPC();
631     int i;
632 
633     /* MVZ always copies one more byte than specified - maximum is 256 */
634     l++;
635 
636     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
637     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
638     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
639 
640     set_helper_retaddr(ra);
641     for (i = 0; i < l; i++) {
642         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
643                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
644 
645         access_set_byte(env, &desta, i, x, ra);
646     }
647     clear_helper_retaddr();
648 }
649 
650 /* compare unsigned byte arrays */
651 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
652                               uint64_t s2, uintptr_t ra)
653 {
654     uint32_t i;
655     uint32_t cc = 0;
656 
657     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
658                __func__, l, s1, s2);
659 
660     for (i = 0; i <= l; i++) {
661         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
662         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
663         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
664         if (x < y) {
665             cc = 1;
666             break;
667         } else if (x > y) {
668             cc = 2;
669             break;
670         }
671     }
672 
673     HELPER_LOG("\n");
674     return cc;
675 }
676 
677 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
678 {
679     return do_helper_clc(env, l, s1, s2, GETPC());
680 }
681 
682 /* compare logical under mask */
683 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
684                      uint64_t addr)
685 {
686     uintptr_t ra = GETPC();
687     uint32_t cc = 0;
688 
689     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
690                mask, addr);
691 
692     if (!mask) {
693         /* Recognize access exceptions for the first byte */
694         probe_read(env, addr, 1, s390x_env_mmu_index(env, false), ra);
695     }
696 
697     while (mask) {
698         if (mask & 8) {
699             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
700             uint8_t r = extract32(r1, 24, 8);
701             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
702                        addr);
703             if (r < d) {
704                 cc = 1;
705                 break;
706             } else if (r > d) {
707                 cc = 2;
708                 break;
709             }
710             addr++;
711         }
712         mask = (mask << 1) & 0xf;
713         r1 <<= 8;
714     }
715 
716     HELPER_LOG("\n");
717     return cc;
718 }
719 
720 static inline uint64_t get_address(CPUS390XState *env, int reg)
721 {
722     return wrap_address(env, env->regs[reg]);
723 }
724 
725 /*
726  * Store the address to the given register, zeroing out unused leftmost
727  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
728  */
729 static inline void set_address_zero(CPUS390XState *env, int reg,
730                                     uint64_t address)
731 {
732     if (env->psw.mask & PSW_MASK_64) {
733         env->regs[reg] = address;
734     } else {
735         if (!(env->psw.mask & PSW_MASK_32)) {
736             address &= 0x00ffffff;
737         } else {
738             address &= 0x7fffffff;
739         }
740         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
741     }
742 }
743 
744 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
745 {
746     if (env->psw.mask & PSW_MASK_64) {
747         /* 64-Bit mode */
748         env->regs[reg] = address;
749     } else {
750         if (!(env->psw.mask & PSW_MASK_32)) {
751             /* 24-Bit mode. According to the PoO it is implementation
752             dependent if bits 32-39 remain unchanged or are set to
753             zeros.  Choose the former so that the function can also be
754             used for TRT.  */
755             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
756         } else {
757             /* 31-Bit mode. According to the PoO it is implementation
758             dependent if bit 32 remains unchanged or is set to zero.
759             Choose the latter so that the function can also be used for
760             TRT.  */
761             address &= 0x7fffffff;
762             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
763         }
764     }
765 }
766 
767 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
768 {
769     if (!(env->psw.mask & PSW_MASK_64)) {
770         return (uint32_t)length;
771     }
772     return length;
773 }
774 
775 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
776 {
777     if (!(env->psw.mask & PSW_MASK_64)) {
778         /* 24-Bit and 31-Bit mode */
779         length &= 0x7fffffff;
780     }
781     return length;
782 }
783 
784 static inline uint64_t get_length(CPUS390XState *env, int reg)
785 {
786     return wrap_length31(env, env->regs[reg]);
787 }
788 
789 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
790 {
791     if (env->psw.mask & PSW_MASK_64) {
792         /* 64-Bit mode */
793         env->regs[reg] = length;
794     } else {
795         /* 24-Bit and 31-Bit mode */
796         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
797     }
798 }
799 
800 /* search string (c is byte to search, r2 is string, r1 end of string) */
801 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
802 {
803     uintptr_t ra = GETPC();
804     uint64_t end, str;
805     uint32_t len;
806     uint8_t v, c = env->regs[0];
807 
808     /* Bits 32-55 must contain all 0.  */
809     if (env->regs[0] & 0xffffff00u) {
810         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
811     }
812 
813     str = get_address(env, r2);
814     end = get_address(env, r1);
815 
816     /* Lest we fail to service interrupts in a timely manner, limit the
817        amount of work we're willing to do.  For now, let's cap at 8k.  */
818     for (len = 0; len < 0x2000; ++len) {
819         if (str + len == end) {
820             /* Character not found.  R1 & R2 are unmodified.  */
821             env->cc_op = 2;
822             return;
823         }
824         v = cpu_ldub_data_ra(env, str + len, ra);
825         if (v == c) {
826             /* Character found.  Set R1 to the location; R2 is unmodified.  */
827             env->cc_op = 1;
828             set_address(env, r1, str + len);
829             return;
830         }
831     }
832 
833     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
834     env->cc_op = 3;
835     set_address(env, r2, str + len);
836 }
837 
838 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
839 {
840     uintptr_t ra = GETPC();
841     uint32_t len;
842     uint16_t v, c = env->regs[0];
843     uint64_t end, str, adj_end;
844 
845     /* Bits 32-47 of R0 must be zero.  */
846     if (env->regs[0] & 0xffff0000u) {
847         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
848     }
849 
850     str = get_address(env, r2);
851     end = get_address(env, r1);
852 
853     /* If the LSB of the two addresses differ, use one extra byte.  */
854     adj_end = end + ((str ^ end) & 1);
855 
856     /* Lest we fail to service interrupts in a timely manner, limit the
857        amount of work we're willing to do.  For now, let's cap at 8k.  */
858     for (len = 0; len < 0x2000; len += 2) {
859         if (str + len == adj_end) {
860             /* End of input found.  */
861             env->cc_op = 2;
862             return;
863         }
864         v = cpu_lduw_data_ra(env, str + len, ra);
865         if (v == c) {
866             /* Character found.  Set R1 to the location; R2 is unmodified.  */
867             env->cc_op = 1;
868             set_address(env, r1, str + len);
869             return;
870         }
871     }
872 
873     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
874     env->cc_op = 3;
875     set_address(env, r2, str + len);
876 }
877 
878 /* unsigned string compare (c is string terminator) */
879 Int128 HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
880 {
881     uintptr_t ra = GETPC();
882     uint32_t len;
883 
884     c = c & 0xff;
885     s1 = wrap_address(env, s1);
886     s2 = wrap_address(env, s2);
887 
888     /* Lest we fail to service interrupts in a timely manner, limit the
889        amount of work we're willing to do.  For now, let's cap at 8k.  */
890     for (len = 0; len < 0x2000; ++len) {
891         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
892         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
893         if (v1 == v2) {
894             if (v1 == c) {
895                 /* Equal.  CC=0, and don't advance the registers.  */
896                 env->cc_op = 0;
897                 return int128_make128(s2, s1);
898             }
899         } else {
900             /* Unequal.  CC={1,2}, and advance the registers.  Note that
901                the terminator need not be zero, but the string that contains
902                the terminator is by definition "low".  */
903             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
904             return int128_make128(s2 + len, s1 + len);
905         }
906     }
907 
908     /* CPU-determined bytes equal; advance the registers.  */
909     env->cc_op = 3;
910     return int128_make128(s2 + len, s1 + len);
911 }
912 
913 /* move page */
914 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
915 {
916     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
917     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
918     const int mmu_idx = s390x_env_mmu_index(env, false);
919     const bool f = extract64(r0, 11, 1);
920     const bool s = extract64(r0, 10, 1);
921     const bool cco = extract64(r0, 8, 1);
922     uintptr_t ra = GETPC();
923     S390Access srca, desta;
924     int exc;
925 
926     if ((f && s) || extract64(r0, 12, 4)) {
927         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
928     }
929 
930     /*
931      * We always manually handle exceptions such that we can properly store
932      * r1/r2 to the lowcore on page-translation exceptions.
933      *
934      * TODO: Access key handling
935      */
936     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
937                             MMU_DATA_LOAD, mmu_idx, ra);
938     if (exc) {
939         if (cco) {
940             return 2;
941         }
942         goto inject_exc;
943     }
944     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
945                             MMU_DATA_STORE, mmu_idx, ra);
946     if (exc) {
947         if (cco && exc != PGM_PROTECTION) {
948             return 1;
949         }
950         goto inject_exc;
951     }
952     access_memmove(env, &desta, &srca, ra);
953     return 0; /* data moved */
954 inject_exc:
955 #if !defined(CONFIG_USER_ONLY)
956     if (exc != PGM_ADDRESSING) {
957         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
958                  env->tlb_fill_tec);
959     }
960     if (exc == PGM_PAGE_TRANS) {
961         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
962                  r1 << 4 | r2);
963     }
964 #endif
965     tcg_s390_program_interrupt(env, exc, ra);
966 }
967 
968 /* string copy */
969 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
970 {
971     const int mmu_idx = s390x_env_mmu_index(env, false);
972     const uint64_t d = get_address(env, r1);
973     const uint64_t s = get_address(env, r2);
974     const uint8_t c = env->regs[0];
975     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
976     S390Access srca, desta;
977     uintptr_t ra = GETPC();
978     int i;
979 
980     if (env->regs[0] & 0xffffff00ull) {
981         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
982     }
983 
984     /*
985      * Our access should not exceed single pages, as we must not report access
986      * exceptions exceeding the actually copied range (which we don't know at
987      * this point). We might over-indicate watchpoints within the pages
988      * (if we ever care, we have to limit processing to a single byte).
989      */
990     access_prepare(&srca, env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
991     access_prepare(&desta, env, d, len, MMU_DATA_STORE, mmu_idx, ra);
992 
993     set_helper_retaddr(ra);
994     for (i = 0; i < len; i++) {
995         const uint8_t v = access_get_byte(env, &srca, i, ra);
996 
997         access_set_byte(env, &desta, i, v, ra);
998         if (v == c) {
999             clear_helper_retaddr();
1000             set_address_zero(env, r1, d + i);
1001             return 1;
1002         }
1003     }
1004     clear_helper_retaddr();
1005     set_address_zero(env, r1, d + len);
1006     set_address_zero(env, r2, s + len);
1007     return 3;
1008 }
1009 
1010 /* load access registers r1 to r3 from memory at a2 */
1011 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1012 {
1013     uintptr_t ra = GETPC();
1014     int i;
1015 
1016     if (a2 & 0x3) {
1017         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1018     }
1019 
1020     for (i = r1;; i = (i + 1) % 16) {
1021         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
1022         a2 += 4;
1023 
1024         if (i == r3) {
1025             break;
1026         }
1027     }
1028 }
1029 
1030 /* store access registers r1 to r3 in memory at a2 */
1031 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1032 {
1033     uintptr_t ra = GETPC();
1034     int i;
1035 
1036     if (a2 & 0x3) {
1037         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1038     }
1039 
1040     for (i = r1;; i = (i + 1) % 16) {
1041         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1042         a2 += 4;
1043 
1044         if (i == r3) {
1045             break;
1046         }
1047     }
1048 }
1049 
1050 /* move long helper */
1051 static inline uint32_t do_mvcl(CPUS390XState *env,
1052                                uint64_t *dest, uint64_t *destlen,
1053                                uint64_t *src, uint64_t *srclen,
1054                                uint16_t pad, int wordsize, uintptr_t ra)
1055 {
1056     const int mmu_idx = s390x_env_mmu_index(env, false);
1057     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1058     S390Access srca, desta;
1059     int i, cc;
1060 
1061     if (*destlen == *srclen) {
1062         cc = 0;
1063     } else if (*destlen < *srclen) {
1064         cc = 1;
1065     } else {
1066         cc = 2;
1067     }
1068 
1069     if (!*destlen) {
1070         return cc;
1071     }
1072 
1073     /*
1074      * Only perform one type of type of operation (move/pad) at a time.
1075      * Stay within single pages.
1076      */
1077     if (*srclen) {
1078         /* Copy the src array */
1079         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1080         *destlen -= len;
1081         *srclen -= len;
1082         access_prepare(&srca, env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1083         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1084         access_memmove(env, &desta, &srca, ra);
1085         *src = wrap_address(env, *src + len);
1086         *dest = wrap_address(env, *dest + len);
1087     } else if (wordsize == 1) {
1088         /* Pad the remaining area */
1089         *destlen -= len;
1090         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1091         access_memset(env, &desta, pad, ra);
1092         *dest = wrap_address(env, *dest + len);
1093     } else {
1094         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1095         set_helper_retaddr(ra);
1096 
1097         /* The remaining length selects the padding byte. */
1098         for (i = 0; i < len; (*destlen)--, i++) {
1099             if (*destlen & 1) {
1100                 access_set_byte(env, &desta, i, pad, ra);
1101             } else {
1102                 access_set_byte(env, &desta, i, pad >> 8, ra);
1103             }
1104         }
1105         clear_helper_retaddr();
1106         *dest = wrap_address(env, *dest + len);
1107     }
1108 
1109     return *destlen ? 3 : cc;
1110 }
1111 
1112 /* move long */
1113 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1114 {
1115     const int mmu_idx = s390x_env_mmu_index(env, false);
1116     uintptr_t ra = GETPC();
1117     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1118     uint64_t dest = get_address(env, r1);
1119     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1120     uint64_t src = get_address(env, r2);
1121     uint8_t pad = env->regs[r2 + 1] >> 24;
1122     CPUState *cs = env_cpu(env);
1123     S390Access srca, desta;
1124     uint32_t cc, cur_len;
1125 
1126     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1127         cc = 3;
1128     } else if (srclen == destlen) {
1129         cc = 0;
1130     } else if (destlen < srclen) {
1131         cc = 1;
1132     } else {
1133         cc = 2;
1134     }
1135 
1136     /* We might have to zero-out some bits even if there was no action. */
1137     if (unlikely(!destlen || cc == 3)) {
1138         set_address_zero(env, r2, src);
1139         set_address_zero(env, r1, dest);
1140         return cc;
1141     } else if (!srclen) {
1142         set_address_zero(env, r2, src);
1143     }
1144 
1145     /*
1146      * Only perform one type of type of operation (move/pad) in one step.
1147      * Stay within single pages.
1148      */
1149     while (destlen) {
1150         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1151         if (!srclen) {
1152             access_prepare(&desta, env, dest, cur_len,
1153                            MMU_DATA_STORE, mmu_idx, ra);
1154             access_memset(env, &desta, pad, ra);
1155         } else {
1156             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1157 
1158             access_prepare(&srca, env, src, cur_len,
1159                            MMU_DATA_LOAD, mmu_idx, ra);
1160             access_prepare(&desta, env, dest, cur_len,
1161                            MMU_DATA_STORE, mmu_idx, ra);
1162             access_memmove(env, &desta, &srca, ra);
1163             src = wrap_address(env, src + cur_len);
1164             srclen -= cur_len;
1165             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1166             set_address_zero(env, r2, src);
1167         }
1168         dest = wrap_address(env, dest + cur_len);
1169         destlen -= cur_len;
1170         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1171         set_address_zero(env, r1, dest);
1172 
1173         /*
1174          * MVCL is interruptible. Return to the main loop if requested after
1175          * writing back all state to registers. If no interrupt will get
1176          * injected, we'll end up back in this handler and continue processing
1177          * the remaining parts.
1178          */
1179         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1180             cpu_loop_exit_restore(cs, ra);
1181         }
1182     }
1183     return cc;
1184 }
1185 
1186 /* move long extended */
1187 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1188                        uint32_t r3)
1189 {
1190     uintptr_t ra = GETPC();
1191     uint64_t destlen = get_length(env, r1 + 1);
1192     uint64_t dest = get_address(env, r1);
1193     uint64_t srclen = get_length(env, r3 + 1);
1194     uint64_t src = get_address(env, r3);
1195     uint8_t pad = a2;
1196     uint32_t cc;
1197 
1198     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1199 
1200     set_length(env, r1 + 1, destlen);
1201     set_length(env, r3 + 1, srclen);
1202     set_address(env, r1, dest);
1203     set_address(env, r3, src);
1204 
1205     return cc;
1206 }
1207 
1208 /* move long unicode */
1209 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1210                        uint32_t r3)
1211 {
1212     uintptr_t ra = GETPC();
1213     uint64_t destlen = get_length(env, r1 + 1);
1214     uint64_t dest = get_address(env, r1);
1215     uint64_t srclen = get_length(env, r3 + 1);
1216     uint64_t src = get_address(env, r3);
1217     uint16_t pad = a2;
1218     uint32_t cc;
1219 
1220     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1221 
1222     set_length(env, r1 + 1, destlen);
1223     set_length(env, r3 + 1, srclen);
1224     set_address(env, r1, dest);
1225     set_address(env, r3, src);
1226 
1227     return cc;
1228 }
1229 
1230 /* compare logical long helper */
1231 static inline uint32_t do_clcl(CPUS390XState *env,
1232                                uint64_t *src1, uint64_t *src1len,
1233                                uint64_t *src3, uint64_t *src3len,
1234                                uint16_t pad, uint64_t limit,
1235                                int wordsize, uintptr_t ra)
1236 {
1237     uint64_t len = MAX(*src1len, *src3len);
1238     uint32_t cc = 0;
1239 
1240     check_alignment(env, *src1len | *src3len, wordsize, ra);
1241 
1242     if (!len) {
1243         return cc;
1244     }
1245 
1246     /* Lest we fail to service interrupts in a timely manner, limit the
1247        amount of work we're willing to do.  */
1248     if (len > limit) {
1249         len = limit;
1250         cc = 3;
1251     }
1252 
1253     for (; len; len -= wordsize) {
1254         uint16_t v1 = pad;
1255         uint16_t v3 = pad;
1256 
1257         if (*src1len) {
1258             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1259         }
1260         if (*src3len) {
1261             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1262         }
1263 
1264         if (v1 != v3) {
1265             cc = (v1 < v3) ? 1 : 2;
1266             break;
1267         }
1268 
1269         if (*src1len) {
1270             *src1 += wordsize;
1271             *src1len -= wordsize;
1272         }
1273         if (*src3len) {
1274             *src3 += wordsize;
1275             *src3len -= wordsize;
1276         }
1277     }
1278 
1279     return cc;
1280 }
1281 
1282 
1283 /* compare logical long */
1284 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1285 {
1286     uintptr_t ra = GETPC();
1287     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1288     uint64_t src1 = get_address(env, r1);
1289     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1290     uint64_t src3 = get_address(env, r2);
1291     uint8_t pad = env->regs[r2 + 1] >> 24;
1292     uint32_t cc;
1293 
1294     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1295 
1296     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1297     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1298     set_address(env, r1, src1);
1299     set_address(env, r2, src3);
1300 
1301     return cc;
1302 }
1303 
1304 /* compare logical long extended memcompare insn with padding */
1305 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1306                        uint32_t r3)
1307 {
1308     uintptr_t ra = GETPC();
1309     uint64_t src1len = get_length(env, r1 + 1);
1310     uint64_t src1 = get_address(env, r1);
1311     uint64_t src3len = get_length(env, r3 + 1);
1312     uint64_t src3 = get_address(env, r3);
1313     uint8_t pad = a2;
1314     uint32_t cc;
1315 
1316     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1317 
1318     set_length(env, r1 + 1, src1len);
1319     set_length(env, r3 + 1, src3len);
1320     set_address(env, r1, src1);
1321     set_address(env, r3, src3);
1322 
1323     return cc;
1324 }
1325 
1326 /* compare logical long unicode memcompare insn with padding */
1327 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1328                        uint32_t r3)
1329 {
1330     uintptr_t ra = GETPC();
1331     uint64_t src1len = get_length(env, r1 + 1);
1332     uint64_t src1 = get_address(env, r1);
1333     uint64_t src3len = get_length(env, r3 + 1);
1334     uint64_t src3 = get_address(env, r3);
1335     uint16_t pad = a2;
1336     uint32_t cc = 0;
1337 
1338     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1339 
1340     set_length(env, r1 + 1, src1len);
1341     set_length(env, r3 + 1, src3len);
1342     set_address(env, r1, src1);
1343     set_address(env, r3, src3);
1344 
1345     return cc;
1346 }
1347 
1348 /* checksum */
1349 Int128 HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1350                     uint64_t src, uint64_t src_len)
1351 {
1352     uintptr_t ra = GETPC();
1353     uint64_t max_len, len;
1354     uint64_t cksm = (uint32_t)r1;
1355 
1356     /* Lest we fail to service interrupts in a timely manner, limit the
1357        amount of work we're willing to do.  For now, let's cap at 8k.  */
1358     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1359 
1360     /* Process full words as available.  */
1361     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1362         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1363     }
1364 
1365     switch (max_len - len) {
1366     case 1:
1367         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1368         len += 1;
1369         break;
1370     case 2:
1371         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1372         len += 2;
1373         break;
1374     case 3:
1375         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1376         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1377         len += 3;
1378         break;
1379     }
1380 
1381     /* Fold the carry from the checksum.  Note that we can see carry-out
1382        during folding more than once (but probably not more than twice).  */
1383     while (cksm > 0xffffffffull) {
1384         cksm = (uint32_t)cksm + (cksm >> 32);
1385     }
1386 
1387     /* Indicate whether or not we've processed everything.  */
1388     env->cc_op = (len == src_len ? 0 : 3);
1389 
1390     /* Return both cksm and processed length.  */
1391     return int128_make128(cksm, len);
1392 }
1393 
1394 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1395 {
1396     uintptr_t ra = GETPC();
1397     int len_dest = len >> 4;
1398     int len_src = len & 0xf;
1399     uint8_t b;
1400 
1401     dest += len_dest;
1402     src += len_src;
1403 
1404     /* last byte is special, it only flips the nibbles */
1405     b = cpu_ldub_data_ra(env, src, ra);
1406     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1407     src--;
1408     len_src--;
1409 
1410     /* now pack every value */
1411     while (len_dest > 0) {
1412         b = 0;
1413 
1414         if (len_src >= 0) {
1415             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1416             src--;
1417             len_src--;
1418         }
1419         if (len_src >= 0) {
1420             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1421             src--;
1422             len_src--;
1423         }
1424 
1425         len_dest--;
1426         dest--;
1427         cpu_stb_data_ra(env, dest, b, ra);
1428     }
1429 }
1430 
1431 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1432                            uint32_t srclen, int ssize, uintptr_t ra)
1433 {
1434     int i;
1435     /* The destination operand is always 16 bytes long.  */
1436     const int destlen = 16;
1437 
1438     /* The operands are processed from right to left.  */
1439     src += srclen - 1;
1440     dest += destlen - 1;
1441 
1442     for (i = 0; i < destlen; i++) {
1443         uint8_t b = 0;
1444 
1445         /* Start with a positive sign */
1446         if (i == 0) {
1447             b = 0xc;
1448         } else if (srclen > ssize) {
1449             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1450             src -= ssize;
1451             srclen -= ssize;
1452         }
1453 
1454         if (srclen > ssize) {
1455             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1456             src -= ssize;
1457             srclen -= ssize;
1458         }
1459 
1460         cpu_stb_data_ra(env, dest, b, ra);
1461         dest--;
1462     }
1463 }
1464 
1465 
1466 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1467                  uint32_t srclen)
1468 {
1469     do_pkau(env, dest, src, srclen, 1, GETPC());
1470 }
1471 
1472 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1473                  uint32_t srclen)
1474 {
1475     do_pkau(env, dest, src, srclen, 2, GETPC());
1476 }
1477 
1478 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1479                   uint64_t src)
1480 {
1481     uintptr_t ra = GETPC();
1482     int len_dest = len >> 4;
1483     int len_src = len & 0xf;
1484     uint8_t b;
1485     int second_nibble = 0;
1486 
1487     dest += len_dest;
1488     src += len_src;
1489 
1490     /* last byte is special, it only flips the nibbles */
1491     b = cpu_ldub_data_ra(env, src, ra);
1492     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1493     src--;
1494     len_src--;
1495 
1496     /* now pad every nibble with 0xf0 */
1497 
1498     while (len_dest > 0) {
1499         uint8_t cur_byte = 0;
1500 
1501         if (len_src > 0) {
1502             cur_byte = cpu_ldub_data_ra(env, src, ra);
1503         }
1504 
1505         len_dest--;
1506         dest--;
1507 
1508         /* only advance one nibble at a time */
1509         if (second_nibble) {
1510             cur_byte >>= 4;
1511             len_src--;
1512             src--;
1513         }
1514         second_nibble = !second_nibble;
1515 
1516         /* digit */
1517         cur_byte = (cur_byte & 0xf);
1518         /* zone bits */
1519         cur_byte |= 0xf0;
1520 
1521         cpu_stb_data_ra(env, dest, cur_byte, ra);
1522     }
1523 }
1524 
1525 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1526                                  uint32_t destlen, int dsize, uint64_t src,
1527                                  uintptr_t ra)
1528 {
1529     int i;
1530     uint32_t cc;
1531     uint8_t b;
1532     /* The source operand is always 16 bytes long.  */
1533     const int srclen = 16;
1534 
1535     /* The operands are processed from right to left.  */
1536     src += srclen - 1;
1537     dest += destlen - dsize;
1538 
1539     /* Check for the sign.  */
1540     b = cpu_ldub_data_ra(env, src, ra);
1541     src--;
1542     switch (b & 0xf) {
1543     case 0xa:
1544     case 0xc:
1545     case 0xe ... 0xf:
1546         cc = 0;  /* plus */
1547         break;
1548     case 0xb:
1549     case 0xd:
1550         cc = 1;  /* minus */
1551         break;
1552     default:
1553     case 0x0 ... 0x9:
1554         cc = 3;  /* invalid */
1555         break;
1556     }
1557 
1558     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1559     for (i = 0; i < destlen; i += dsize) {
1560         if (i == (31 * dsize)) {
1561             /* If length is 32/64 bytes, the leftmost byte is 0. */
1562             b = 0;
1563         } else if (i % (2 * dsize)) {
1564             b = cpu_ldub_data_ra(env, src, ra);
1565             src--;
1566         } else {
1567             b >>= 4;
1568         }
1569         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1570         dest -= dsize;
1571     }
1572 
1573     return cc;
1574 }
1575 
1576 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1577                        uint64_t src)
1578 {
1579     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1580 }
1581 
1582 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1583                        uint64_t src)
1584 {
1585     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1586 }
1587 
1588 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1589 {
1590     uintptr_t ra = GETPC();
1591     uint32_t cc = 0;
1592     int i;
1593 
1594     for (i = 0; i < destlen; i++) {
1595         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1596         /* digit */
1597         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1598 
1599         if (i == (destlen - 1)) {
1600             /* sign */
1601             cc |= (b & 0xf) < 0xa ? 1 : 0;
1602         } else {
1603             /* digit */
1604             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1605         }
1606     }
1607 
1608     return cc;
1609 }
1610 
1611 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1612                              uint64_t trans, uintptr_t ra)
1613 {
1614     uint32_t i;
1615 
1616     for (i = 0; i <= len; i++) {
1617         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1618         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1619         cpu_stb_data_ra(env, array + i, new_byte, ra);
1620     }
1621 
1622     return env->cc_op;
1623 }
1624 
1625 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1626                 uint64_t trans)
1627 {
1628     do_helper_tr(env, len, array, trans, GETPC());
1629 }
1630 
1631 Int128 HELPER(tre)(CPUS390XState *env, uint64_t array,
1632                    uint64_t len, uint64_t trans)
1633 {
1634     uintptr_t ra = GETPC();
1635     uint8_t end = env->regs[0] & 0xff;
1636     uint64_t l = len;
1637     uint64_t i;
1638     uint32_t cc = 0;
1639 
1640     if (!(env->psw.mask & PSW_MASK_64)) {
1641         array &= 0x7fffffff;
1642         l = (uint32_t)l;
1643     }
1644 
1645     /* Lest we fail to service interrupts in a timely manner, limit the
1646        amount of work we're willing to do.  For now, let's cap at 8k.  */
1647     if (l > 0x2000) {
1648         l = 0x2000;
1649         cc = 3;
1650     }
1651 
1652     for (i = 0; i < l; i++) {
1653         uint8_t byte, new_byte;
1654 
1655         byte = cpu_ldub_data_ra(env, array + i, ra);
1656 
1657         if (byte == end) {
1658             cc = 1;
1659             break;
1660         }
1661 
1662         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1663         cpu_stb_data_ra(env, array + i, new_byte, ra);
1664     }
1665 
1666     env->cc_op = cc;
1667     return int128_make128(len - i, array + i);
1668 }
1669 
1670 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1671                                      uint64_t array, uint64_t trans,
1672                                      int inc, uintptr_t ra)
1673 {
1674     int i;
1675 
1676     for (i = 0; i <= len; i++) {
1677         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1678         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1679 
1680         if (sbyte != 0) {
1681             set_address(env, 1, array + i * inc);
1682             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1683             return (i == len) ? 2 : 1;
1684         }
1685     }
1686 
1687     return 0;
1688 }
1689 
1690 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1691                                   uint64_t array, uint64_t trans,
1692                                   uintptr_t ra)
1693 {
1694     return do_helper_trt(env, len, array, trans, 1, ra);
1695 }
1696 
1697 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1698                      uint64_t trans)
1699 {
1700     return do_helper_trt(env, len, array, trans, 1, GETPC());
1701 }
1702 
1703 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1704                                    uint64_t array, uint64_t trans,
1705                                    uintptr_t ra)
1706 {
1707     return do_helper_trt(env, len, array, trans, -1, ra);
1708 }
1709 
1710 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1711                       uint64_t trans)
1712 {
1713     return do_helper_trt(env, len, array, trans, -1, GETPC());
1714 }
1715 
1716 /* Translate one/two to one/two */
1717 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1718                       uint32_t tst, uint32_t sizes)
1719 {
1720     uintptr_t ra = GETPC();
1721     int dsize = (sizes & 1) ? 1 : 2;
1722     int ssize = (sizes & 2) ? 1 : 2;
1723     uint64_t tbl = get_address(env, 1);
1724     uint64_t dst = get_address(env, r1);
1725     uint64_t len = get_length(env, r1 + 1);
1726     uint64_t src = get_address(env, r2);
1727     uint32_t cc = 3;
1728     int i;
1729 
1730     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1731        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1732        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1733     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1734         tbl &= -4096;
1735     } else {
1736         tbl &= -8;
1737     }
1738 
1739     check_alignment(env, len, ssize, ra);
1740 
1741     /* Lest we fail to service interrupts in a timely manner, */
1742     /* limit the amount of work we're willing to do.   */
1743     for (i = 0; i < 0x2000; i++) {
1744         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1745         uint64_t tble = tbl + (sval * dsize);
1746         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1747         if (dval == tst) {
1748             cc = 1;
1749             break;
1750         }
1751         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1752 
1753         len -= ssize;
1754         src += ssize;
1755         dst += dsize;
1756 
1757         if (len == 0) {
1758             cc = 0;
1759             break;
1760         }
1761     }
1762 
1763     set_address(env, r1, dst);
1764     set_length(env, r1 + 1, len);
1765     set_address(env, r2, src);
1766 
1767     return cc;
1768 }
1769 
1770 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1771                         uint64_t a2, bool parallel)
1772 {
1773     uint32_t mem_idx = s390x_env_mmu_index(env, false);
1774     MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, mem_idx);
1775     MemOpIdx oi8 = make_memop_idx(MO_TE | MO_64, mem_idx);
1776     MemOpIdx oi4 = make_memop_idx(MO_TE | MO_32, mem_idx);
1777     MemOpIdx oi2 = make_memop_idx(MO_TE | MO_16, mem_idx);
1778     MemOpIdx oi1 = make_memop_idx(MO_8, mem_idx);
1779     uintptr_t ra = GETPC();
1780     uint32_t fc = extract32(env->regs[0], 0, 8);
1781     uint32_t sc = extract32(env->regs[0], 8, 8);
1782     uint64_t pl = get_address(env, 1) & -16;
1783     uint64_t svh, svl;
1784     uint32_t cc;
1785 
1786     /* Sanity check the function code and storage characteristic.  */
1787     if (fc > 1 || sc > 3) {
1788         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1789             goto spec_exception;
1790         }
1791         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1792             goto spec_exception;
1793         }
1794     }
1795 
1796     /* Sanity check the alignments.  */
1797     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1798         goto spec_exception;
1799     }
1800 
1801     /* Sanity check writability of the store address.  */
1802     probe_write(env, a2, 1 << sc, mem_idx, ra);
1803 
1804     /*
1805      * Note that the compare-and-swap is atomic, and the store is atomic,
1806      * but the complete operation is not.  Therefore we do not need to
1807      * assert serial context in order to implement this.  That said,
1808      * restart early if we can't support either operation that is supposed
1809      * to be atomic.
1810      */
1811     if (parallel) {
1812         uint32_t max = 2;
1813 #ifdef CONFIG_ATOMIC64
1814         max = 3;
1815 #endif
1816         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1817             (HAVE_ATOMIC128_RW ? 0 : sc > max)) {
1818             cpu_loop_exit_atomic(env_cpu(env), ra);
1819         }
1820     }
1821 
1822     /*
1823      * All loads happen before all stores.  For simplicity, load the entire
1824      * store value area from the parameter list.
1825      */
1826     svh = cpu_ldq_mmu(env, pl + 16, oi8, ra);
1827     svl = cpu_ldq_mmu(env, pl + 24, oi8, ra);
1828 
1829     switch (fc) {
1830     case 0:
1831         {
1832             uint32_t nv = cpu_ldl_mmu(env, pl, oi4, ra);
1833             uint32_t cv = env->regs[r3];
1834             uint32_t ov;
1835 
1836             if (parallel) {
1837                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi4, ra);
1838             } else {
1839                 ov = cpu_ldl_mmu(env, a1, oi4, ra);
1840                 cpu_stl_mmu(env, a1, (ov == cv ? nv : ov), oi4, ra);
1841             }
1842             cc = (ov != cv);
1843             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1844         }
1845         break;
1846 
1847     case 1:
1848         {
1849             uint64_t nv = cpu_ldq_mmu(env, pl, oi8, ra);
1850             uint64_t cv = env->regs[r3];
1851             uint64_t ov;
1852 
1853             if (parallel) {
1854 #ifdef CONFIG_ATOMIC64
1855                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi8, ra);
1856 #else
1857                 /* Note that we asserted !parallel above.  */
1858                 g_assert_not_reached();
1859 #endif
1860             } else {
1861                 ov = cpu_ldq_mmu(env, a1, oi8, ra);
1862                 cpu_stq_mmu(env, a1, (ov == cv ? nv : ov), oi8, ra);
1863             }
1864             cc = (ov != cv);
1865             env->regs[r3] = ov;
1866         }
1867         break;
1868 
1869     case 2:
1870         {
1871             Int128 nv = cpu_ld16_mmu(env, pl, oi16, ra);
1872             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1873             Int128 ov;
1874 
1875             if (!parallel) {
1876                 ov = cpu_ld16_mmu(env, a1, oi16, ra);
1877                 cc = !int128_eq(ov, cv);
1878                 if (cc) {
1879                     nv = ov;
1880                 }
1881                 cpu_st16_mmu(env, a1, nv, oi16, ra);
1882             } else if (HAVE_CMPXCHG128) {
1883                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi16, ra);
1884                 cc = !int128_eq(ov, cv);
1885             } else {
1886                 /* Note that we asserted !parallel above.  */
1887                 g_assert_not_reached();
1888             }
1889 
1890             env->regs[r3 + 0] = int128_gethi(ov);
1891             env->regs[r3 + 1] = int128_getlo(ov);
1892         }
1893         break;
1894 
1895     default:
1896         g_assert_not_reached();
1897     }
1898 
1899     /* Store only if the comparison succeeded.  Note that above we use a pair
1900        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1901        from the most-significant bits of svh.  */
1902     if (cc == 0) {
1903         switch (sc) {
1904         case 0:
1905             cpu_stb_mmu(env, a2, svh >> 56, oi1, ra);
1906             break;
1907         case 1:
1908             cpu_stw_mmu(env, a2, svh >> 48, oi2, ra);
1909             break;
1910         case 2:
1911             cpu_stl_mmu(env, a2, svh >> 32, oi4, ra);
1912             break;
1913         case 3:
1914             cpu_stq_mmu(env, a2, svh, oi8, ra);
1915             break;
1916         case 4:
1917             cpu_st16_mmu(env, a2, int128_make128(svl, svh), oi16, ra);
1918             break;
1919         default:
1920             g_assert_not_reached();
1921         }
1922     }
1923 
1924     return cc;
1925 
1926  spec_exception:
1927     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1928 }
1929 
1930 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1931 {
1932     return do_csst(env, r3, a1, a2, false);
1933 }
1934 
1935 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1936                                uint64_t a2)
1937 {
1938     return do_csst(env, r3, a1, a2, true);
1939 }
1940 
1941 #if !defined(CONFIG_USER_ONLY)
1942 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1943 {
1944     uintptr_t ra = GETPC();
1945     bool PERchanged = false;
1946     uint64_t src = a2;
1947     uint32_t i;
1948 
1949     if (src & 0x7) {
1950         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1951     }
1952 
1953     for (i = r1;; i = (i + 1) % 16) {
1954         uint64_t val = cpu_ldq_data_ra(env, src, ra);
1955         if (env->cregs[i] != val && i >= 9 && i <= 11) {
1956             PERchanged = true;
1957         }
1958         env->cregs[i] = val;
1959         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
1960                    i, src, val);
1961         src += sizeof(uint64_t);
1962 
1963         if (i == r3) {
1964             break;
1965         }
1966     }
1967 
1968     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1969         s390_cpu_recompute_watchpoints(env_cpu(env));
1970     }
1971 
1972     tlb_flush(env_cpu(env));
1973 }
1974 
1975 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1976 {
1977     uintptr_t ra = GETPC();
1978     bool PERchanged = false;
1979     uint64_t src = a2;
1980     uint32_t i;
1981 
1982     if (src & 0x3) {
1983         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1984     }
1985 
1986     for (i = r1;; i = (i + 1) % 16) {
1987         uint32_t val = cpu_ldl_data_ra(env, src, ra);
1988         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
1989             PERchanged = true;
1990         }
1991         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
1992         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
1993         src += sizeof(uint32_t);
1994 
1995         if (i == r3) {
1996             break;
1997         }
1998     }
1999 
2000     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2001         s390_cpu_recompute_watchpoints(env_cpu(env));
2002     }
2003 
2004     tlb_flush(env_cpu(env));
2005 }
2006 
2007 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2008 {
2009     uintptr_t ra = GETPC();
2010     uint64_t dest = a2;
2011     uint32_t i;
2012 
2013     if (dest & 0x7) {
2014         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2015     }
2016 
2017     for (i = r1;; i = (i + 1) % 16) {
2018         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2019         dest += sizeof(uint64_t);
2020 
2021         if (i == r3) {
2022             break;
2023         }
2024     }
2025 }
2026 
2027 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2028 {
2029     uintptr_t ra = GETPC();
2030     uint64_t dest = a2;
2031     uint32_t i;
2032 
2033     if (dest & 0x3) {
2034         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2035     }
2036 
2037     for (i = r1;; i = (i + 1) % 16) {
2038         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2039         dest += sizeof(uint32_t);
2040 
2041         if (i == r3) {
2042             break;
2043         }
2044     }
2045 }
2046 
2047 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2048 {
2049     uintptr_t ra = GETPC();
2050     int i;
2051 
2052     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2053 
2054     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2055         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2056     }
2057 
2058     return 0;
2059 }
2060 
2061 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2062 {
2063     S390CPU *cpu = env_archcpu(env);
2064     CPUState *cs = env_cpu(env);
2065 
2066     /*
2067      * TODO: we currently don't handle all access protection types
2068      * (including access-list and key-controlled) as well as AR mode.
2069      */
2070     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2071         /* Fetching permitted; storing permitted */
2072         return 0;
2073     }
2074 
2075     if (env->int_pgm_code == PGM_PROTECTION) {
2076         /* retry if reading is possible */
2077         cs->exception_index = -1;
2078         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2079             /* Fetching permitted; storing not permitted */
2080             return 1;
2081         }
2082     }
2083 
2084     switch (env->int_pgm_code) {
2085     case PGM_PROTECTION:
2086         /* Fetching not permitted; storing not permitted */
2087         cs->exception_index = -1;
2088         return 2;
2089     case PGM_ADDRESSING:
2090     case PGM_TRANS_SPEC:
2091         /* exceptions forwarded to the guest */
2092         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2093         return 0;
2094     }
2095 
2096     /* Translation not available */
2097     cs->exception_index = -1;
2098     return 3;
2099 }
2100 
2101 /* insert storage key extended */
2102 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2103 {
2104     static S390SKeysState *ss;
2105     static S390SKeysClass *skeyclass;
2106     uint64_t addr = wrap_address(env, r2);
2107     uint8_t key;
2108     int rc;
2109 
2110     addr = mmu_real2abs(env, addr);
2111     if (!mmu_absolute_addr_valid(addr, false)) {
2112         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2113     }
2114 
2115     if (unlikely(!ss)) {
2116         ss = s390_get_skeys_device();
2117         skeyclass = S390_SKEYS_GET_CLASS(ss);
2118         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2119             tlb_flush_all_cpus_synced(env_cpu(env));
2120         }
2121     }
2122 
2123     rc = s390_skeys_get(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2124     if (rc) {
2125         return 0;
2126     }
2127     return key;
2128 }
2129 
2130 /* set storage key extended */
2131 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2132 {
2133     static S390SKeysState *ss;
2134     static S390SKeysClass *skeyclass;
2135     uint64_t addr = wrap_address(env, r2);
2136     uint8_t key;
2137 
2138     addr = mmu_real2abs(env, addr);
2139     if (!mmu_absolute_addr_valid(addr, false)) {
2140         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2141     }
2142 
2143     if (unlikely(!ss)) {
2144         ss = s390_get_skeys_device();
2145         skeyclass = S390_SKEYS_GET_CLASS(ss);
2146         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2147             tlb_flush_all_cpus_synced(env_cpu(env));
2148         }
2149     }
2150 
2151     key = r1 & 0xfe;
2152     s390_skeys_set(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2153    /*
2154     * As we can only flush by virtual address and not all the entries
2155     * that point to a physical address we have to flush the whole TLB.
2156     */
2157     tlb_flush_all_cpus_synced(env_cpu(env));
2158 }
2159 
2160 /* reset reference bit extended */
2161 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2162 {
2163     uint64_t addr = wrap_address(env, r2);
2164     static S390SKeysState *ss;
2165     static S390SKeysClass *skeyclass;
2166     uint8_t re, key;
2167     int rc;
2168 
2169     addr = mmu_real2abs(env, addr);
2170     if (!mmu_absolute_addr_valid(addr, false)) {
2171         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2172     }
2173 
2174     if (unlikely(!ss)) {
2175         ss = s390_get_skeys_device();
2176         skeyclass = S390_SKEYS_GET_CLASS(ss);
2177         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2178             tlb_flush_all_cpus_synced(env_cpu(env));
2179         }
2180     }
2181 
2182     rc = s390_skeys_get(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2183     if (rc) {
2184         return 0;
2185     }
2186 
2187     re = key & (SK_R | SK_C);
2188     key &= ~SK_R;
2189 
2190     rc = s390_skeys_set(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2191     if (rc) {
2192         return 0;
2193     }
2194    /*
2195     * As we can only flush by virtual address and not all the entries
2196     * that point to a physical address we have to flush the whole TLB.
2197     */
2198     tlb_flush_all_cpus_synced(env_cpu(env));
2199 
2200     /*
2201      * cc
2202      *
2203      * 0  Reference bit zero; change bit zero
2204      * 1  Reference bit zero; change bit one
2205      * 2  Reference bit one; change bit zero
2206      * 3  Reference bit one; change bit one
2207      */
2208 
2209     return re >> 1;
2210 }
2211 
2212 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2213                       uint64_t key)
2214 {
2215     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2216     S390Access srca, desta;
2217     uintptr_t ra = GETPC();
2218     int cc = 0;
2219 
2220     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2221                __func__, l, a1, a2);
2222 
2223     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2224         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2225         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2226     }
2227 
2228     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2229         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2230     }
2231 
2232     l = wrap_length32(env, l);
2233     if (l > 256) {
2234         /* max 256 */
2235         l = 256;
2236         cc = 3;
2237     } else if (!l) {
2238         return cc;
2239     }
2240 
2241     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2242     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2243     access_memmove(env, &desta, &srca, ra);
2244     return cc;
2245 }
2246 
2247 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2248                       uint64_t key)
2249 {
2250     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2251     S390Access srca, desta;
2252     uintptr_t ra = GETPC();
2253     int cc = 0;
2254 
2255     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2256                __func__, l, a1, a2);
2257 
2258     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2259         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2260         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2261     }
2262 
2263     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2264         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2265     }
2266 
2267     l = wrap_length32(env, l);
2268     if (l > 256) {
2269         /* max 256 */
2270         l = 256;
2271         cc = 3;
2272     } else if (!l) {
2273         return cc;
2274     }
2275     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2276     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2277     access_memmove(env, &desta, &srca, ra);
2278     return cc;
2279 }
2280 
2281 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2282 {
2283     CPUState *cs = env_cpu(env);
2284     const uintptr_t ra = GETPC();
2285     uint64_t table, entry, raddr;
2286     uint16_t entries, i, index = 0;
2287 
2288     if (r2 & 0xff000) {
2289         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2290     }
2291 
2292     if (!(r2 & 0x800)) {
2293         /* invalidation-and-clearing operation */
2294         table = r1 & ASCE_ORIGIN;
2295         entries = (r2 & 0x7ff) + 1;
2296 
2297         switch (r1 & ASCE_TYPE_MASK) {
2298         case ASCE_TYPE_REGION1:
2299             index = (r2 >> 53) & 0x7ff;
2300             break;
2301         case ASCE_TYPE_REGION2:
2302             index = (r2 >> 42) & 0x7ff;
2303             break;
2304         case ASCE_TYPE_REGION3:
2305             index = (r2 >> 31) & 0x7ff;
2306             break;
2307         case ASCE_TYPE_SEGMENT:
2308             index = (r2 >> 20) & 0x7ff;
2309             break;
2310         }
2311         for (i = 0; i < entries; i++) {
2312             /* addresses are not wrapped in 24/31bit mode but table index is */
2313             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2314             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2315             if (!(entry & REGION_ENTRY_I)) {
2316                 /* we are allowed to not store if already invalid */
2317                 entry |= REGION_ENTRY_I;
2318                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2319             }
2320         }
2321     }
2322 
2323     /* We simply flush the complete tlb, therefore we can ignore r3. */
2324     if (m4 & 1) {
2325         tlb_flush(cs);
2326     } else {
2327         tlb_flush_all_cpus_synced(cs);
2328     }
2329 }
2330 
2331 /* invalidate pte */
2332 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2333                   uint32_t m4)
2334 {
2335     CPUState *cs = env_cpu(env);
2336     const uintptr_t ra = GETPC();
2337     uint64_t page = vaddr & TARGET_PAGE_MASK;
2338     uint64_t pte_addr, pte;
2339 
2340     /* Compute the page table entry address */
2341     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2342     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2343 
2344     /* Mark the page table entry as invalid */
2345     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2346     pte |= PAGE_ENTRY_I;
2347     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2348 
2349     /* XXX we exploit the fact that Linux passes the exact virtual
2350        address here - it's not obliged to! */
2351     if (m4 & 1) {
2352         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2353             tlb_flush_page(cs, page);
2354             /* XXX 31-bit hack */
2355             tlb_flush_page(cs, page ^ 0x80000000);
2356         } else {
2357             /* looks like we don't have a valid virtual address */
2358             tlb_flush(cs);
2359         }
2360     } else {
2361         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2362             tlb_flush_page_all_cpus_synced(cs, page);
2363             /* XXX 31-bit hack */
2364             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2365         } else {
2366             /* looks like we don't have a valid virtual address */
2367             tlb_flush_all_cpus_synced(cs);
2368         }
2369     }
2370 }
2371 
2372 /* flush local tlb */
2373 void HELPER(ptlb)(CPUS390XState *env)
2374 {
2375     tlb_flush(env_cpu(env));
2376 }
2377 
2378 /* flush global tlb */
2379 void HELPER(purge)(CPUS390XState *env)
2380 {
2381     tlb_flush_all_cpus_synced(env_cpu(env));
2382 }
2383 
2384 /* load real address */
2385 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t r1, uint64_t addr)
2386 {
2387     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2388     uint64_t ret, tec;
2389     int flags, exc, cc;
2390 
2391     /* XXX incomplete - has more corner cases */
2392     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2393         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2394     }
2395 
2396     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2397     if (exc) {
2398         cc = 3;
2399         ret = (r1 & 0xFFFFFFFF00000000ULL) | exc | 0x80000000;
2400     } else {
2401         cc = 0;
2402         ret |= addr & ~TARGET_PAGE_MASK;
2403     }
2404 
2405     env->cc_op = cc;
2406     return ret;
2407 }
2408 #endif
2409 
2410 /* Execute instruction.  This instruction executes an insn modified with
2411    the contents of r1.  It does not change the executed instruction in memory;
2412    it does not change the program counter.
2413 
2414    Perform this by recording the modified instruction in env->ex_value.
2415    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2416 */
2417 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2418 {
2419     uint64_t insn;
2420     uint8_t opc;
2421 
2422     /* EXECUTE targets must be at even addresses.  */
2423     if (addr & 1) {
2424         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
2425     }
2426 
2427     insn = cpu_lduw_code(env, addr);
2428     opc = insn >> 8;
2429 
2430     /* Or in the contents of R1[56:63].  */
2431     insn |= r1 & 0xff;
2432 
2433     /* Load the rest of the instruction.  */
2434     insn <<= 48;
2435     switch (get_ilen(opc)) {
2436     case 2:
2437         break;
2438     case 4:
2439         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2440         break;
2441     case 6:
2442         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2443         break;
2444     default:
2445         g_assert_not_reached();
2446     }
2447 
2448     /* The very most common cases can be sped up by avoiding a new TB.  */
2449     if ((opc & 0xf0) == 0xd0) {
2450         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2451                                       uint64_t, uintptr_t);
2452         static const dx_helper dx[16] = {
2453             [0x0] = do_helper_trt_bkwd,
2454             [0x2] = do_helper_mvc,
2455             [0x4] = do_helper_nc,
2456             [0x5] = do_helper_clc,
2457             [0x6] = do_helper_oc,
2458             [0x7] = do_helper_xc,
2459             [0xc] = do_helper_tr,
2460             [0xd] = do_helper_trt_fwd,
2461         };
2462         dx_helper helper = dx[opc & 0xf];
2463 
2464         if (helper) {
2465             uint32_t l = extract64(insn, 48, 8);
2466             uint32_t b1 = extract64(insn, 44, 4);
2467             uint32_t d1 = extract64(insn, 32, 12);
2468             uint32_t b2 = extract64(insn, 28, 4);
2469             uint32_t d2 = extract64(insn, 16, 12);
2470             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2471             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2472 
2473             env->cc_op = helper(env, l, a1, a2, 0);
2474             env->psw.addr += ilen;
2475             return;
2476         }
2477     } else if (opc == 0x0a) {
2478         env->int_svc_code = extract64(insn, 48, 8);
2479         env->int_svc_ilen = ilen;
2480         helper_exception(env, EXCP_SVC);
2481         g_assert_not_reached();
2482     }
2483 
2484     /* Record the insn we want to execute as well as the ilen to use
2485        during the execution of the target insn.  This will also ensure
2486        that ex_value is non-zero, which flags that we are in a state
2487        that requires such execution.  */
2488     env->ex_value = insn | ilen;
2489     env->ex_target = addr;
2490 }
2491 
2492 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2493                        uint64_t len)
2494 {
2495     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2496     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2497     const uint64_t r0 = env->regs[0];
2498     const uintptr_t ra = GETPC();
2499     uint8_t dest_key, dest_as, dest_k, dest_a;
2500     uint8_t src_key, src_as, src_k, src_a;
2501     uint64_t val;
2502     int cc = 0;
2503 
2504     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2505                __func__, dest, src, len);
2506 
2507     if (!(env->psw.mask & PSW_MASK_DAT)) {
2508         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2509     }
2510 
2511     /* OAC (operand access control) for the first operand -> dest */
2512     val = (r0 & 0xffff0000ULL) >> 16;
2513     dest_key = (val >> 12) & 0xf;
2514     dest_as = (val >> 6) & 0x3;
2515     dest_k = (val >> 1) & 0x1;
2516     dest_a = val & 0x1;
2517 
2518     /* OAC (operand access control) for the second operand -> src */
2519     val = (r0 & 0x0000ffffULL);
2520     src_key = (val >> 12) & 0xf;
2521     src_as = (val >> 6) & 0x3;
2522     src_k = (val >> 1) & 0x1;
2523     src_a = val & 0x1;
2524 
2525     if (!dest_k) {
2526         dest_key = psw_key;
2527     }
2528     if (!src_k) {
2529         src_key = psw_key;
2530     }
2531     if (!dest_a) {
2532         dest_as = psw_as;
2533     }
2534     if (!src_a) {
2535         src_as = psw_as;
2536     }
2537 
2538     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2539         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2540     }
2541     if (!(env->cregs[0] & CR0_SECONDARY) &&
2542         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2543         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2544     }
2545     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2546         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2547     }
2548 
2549     len = wrap_length32(env, len);
2550     if (len > 4096) {
2551         cc = 3;
2552         len = 4096;
2553     }
2554 
2555     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2556     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2557         (env->psw.mask & PSW_MASK_PSTATE)) {
2558         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2559                       __func__);
2560         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2561     }
2562 
2563     /* FIXME: Access using correct keys and AR-mode */
2564     if (len) {
2565         S390Access srca, desta;
2566 
2567         access_prepare(&srca, env, src, len, MMU_DATA_LOAD,
2568                        mmu_idx_from_as(src_as), ra);
2569         access_prepare(&desta, env, dest, len, MMU_DATA_STORE,
2570                        mmu_idx_from_as(dest_as), ra);
2571 
2572         access_memmove(env, &desta, &srca, ra);
2573     }
2574 
2575     return cc;
2576 }
2577 
2578 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2579    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2580    value >= 0 indicates failure, and the CC value to be returned.  */
2581 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2582                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2583                                  uint32_t *ochar, uint32_t *olen);
2584 
2585 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2586    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2587    indicates failure, and the CC value to be returned.  */
2588 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2589                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2590                                  uint32_t *olen);
2591 
2592 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2593                        bool enh_check, uintptr_t ra,
2594                        uint32_t *ochar, uint32_t *olen)
2595 {
2596     uint8_t s0, s1, s2, s3;
2597     uint32_t c, l;
2598 
2599     if (ilen < 1) {
2600         return 0;
2601     }
2602     s0 = cpu_ldub_data_ra(env, addr, ra);
2603     if (s0 <= 0x7f) {
2604         /* one byte character */
2605         l = 1;
2606         c = s0;
2607     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2608         /* invalid character */
2609         return 2;
2610     } else if (s0 <= 0xdf) {
2611         /* two byte character */
2612         l = 2;
2613         if (ilen < 2) {
2614             return 0;
2615         }
2616         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2617         c = s0 & 0x1f;
2618         c = (c << 6) | (s1 & 0x3f);
2619         if (enh_check && (s1 & 0xc0) != 0x80) {
2620             return 2;
2621         }
2622     } else if (s0 <= 0xef) {
2623         /* three byte character */
2624         l = 3;
2625         if (ilen < 3) {
2626             return 0;
2627         }
2628         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2629         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2630         c = s0 & 0x0f;
2631         c = (c << 6) | (s1 & 0x3f);
2632         c = (c << 6) | (s2 & 0x3f);
2633         /* Fold the byte-by-byte range descriptions in the PoO into
2634            tests against the complete value.  It disallows encodings
2635            that could be smaller, and the UTF-16 surrogates.  */
2636         if (enh_check
2637             && ((s1 & 0xc0) != 0x80
2638                 || (s2 & 0xc0) != 0x80
2639                 || c < 0x1000
2640                 || (c >= 0xd800 && c <= 0xdfff))) {
2641             return 2;
2642         }
2643     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2644         /* four byte character */
2645         l = 4;
2646         if (ilen < 4) {
2647             return 0;
2648         }
2649         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2650         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2651         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2652         c = s0 & 0x07;
2653         c = (c << 6) | (s1 & 0x3f);
2654         c = (c << 6) | (s2 & 0x3f);
2655         c = (c << 6) | (s3 & 0x3f);
2656         /* See above.  */
2657         if (enh_check
2658             && ((s1 & 0xc0) != 0x80
2659                 || (s2 & 0xc0) != 0x80
2660                 || (s3 & 0xc0) != 0x80
2661                 || c < 0x010000
2662                 || c > 0x10ffff)) {
2663             return 2;
2664         }
2665     } else {
2666         /* invalid character */
2667         return 2;
2668     }
2669 
2670     *ochar = c;
2671     *olen = l;
2672     return -1;
2673 }
2674 
2675 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2676                         bool enh_check, uintptr_t ra,
2677                         uint32_t *ochar, uint32_t *olen)
2678 {
2679     uint16_t s0, s1;
2680     uint32_t c, l;
2681 
2682     if (ilen < 2) {
2683         return 0;
2684     }
2685     s0 = cpu_lduw_data_ra(env, addr, ra);
2686     if ((s0 & 0xfc00) != 0xd800) {
2687         /* one word character */
2688         l = 2;
2689         c = s0;
2690     } else {
2691         /* two word character */
2692         l = 4;
2693         if (ilen < 4) {
2694             return 0;
2695         }
2696         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2697         c = extract32(s0, 6, 4) + 1;
2698         c = (c << 6) | (s0 & 0x3f);
2699         c = (c << 10) | (s1 & 0x3ff);
2700         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2701             /* invalid surrogate character */
2702             return 2;
2703         }
2704     }
2705 
2706     *ochar = c;
2707     *olen = l;
2708     return -1;
2709 }
2710 
2711 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2712                         bool enh_check, uintptr_t ra,
2713                         uint32_t *ochar, uint32_t *olen)
2714 {
2715     uint32_t c;
2716 
2717     if (ilen < 4) {
2718         return 0;
2719     }
2720     c = cpu_ldl_data_ra(env, addr, ra);
2721     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2722         /* invalid unicode character */
2723         return 2;
2724     }
2725 
2726     *ochar = c;
2727     *olen = 4;
2728     return -1;
2729 }
2730 
2731 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2732                        uintptr_t ra, uint32_t c, uint32_t *olen)
2733 {
2734     uint8_t d[4];
2735     uint32_t l, i;
2736 
2737     if (c <= 0x7f) {
2738         /* one byte character */
2739         l = 1;
2740         d[0] = c;
2741     } else if (c <= 0x7ff) {
2742         /* two byte character */
2743         l = 2;
2744         d[1] = 0x80 | extract32(c, 0, 6);
2745         d[0] = 0xc0 | extract32(c, 6, 5);
2746     } else if (c <= 0xffff) {
2747         /* three byte character */
2748         l = 3;
2749         d[2] = 0x80 | extract32(c, 0, 6);
2750         d[1] = 0x80 | extract32(c, 6, 6);
2751         d[0] = 0xe0 | extract32(c, 12, 4);
2752     } else {
2753         /* four byte character */
2754         l = 4;
2755         d[3] = 0x80 | extract32(c, 0, 6);
2756         d[2] = 0x80 | extract32(c, 6, 6);
2757         d[1] = 0x80 | extract32(c, 12, 6);
2758         d[0] = 0xf0 | extract32(c, 18, 3);
2759     }
2760 
2761     if (ilen < l) {
2762         return 1;
2763     }
2764     for (i = 0; i < l; ++i) {
2765         cpu_stb_data_ra(env, addr + i, d[i], ra);
2766     }
2767 
2768     *olen = l;
2769     return -1;
2770 }
2771 
2772 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2773                         uintptr_t ra, uint32_t c, uint32_t *olen)
2774 {
2775     uint16_t d0, d1;
2776 
2777     if (c <= 0xffff) {
2778         /* one word character */
2779         if (ilen < 2) {
2780             return 1;
2781         }
2782         cpu_stw_data_ra(env, addr, c, ra);
2783         *olen = 2;
2784     } else {
2785         /* two word character */
2786         if (ilen < 4) {
2787             return 1;
2788         }
2789         d1 = 0xdc00 | extract32(c, 0, 10);
2790         d0 = 0xd800 | extract32(c, 10, 6);
2791         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2792         cpu_stw_data_ra(env, addr + 0, d0, ra);
2793         cpu_stw_data_ra(env, addr + 2, d1, ra);
2794         *olen = 4;
2795     }
2796 
2797     return -1;
2798 }
2799 
2800 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2801                         uintptr_t ra, uint32_t c, uint32_t *olen)
2802 {
2803     if (ilen < 4) {
2804         return 1;
2805     }
2806     cpu_stl_data_ra(env, addr, c, ra);
2807     *olen = 4;
2808     return -1;
2809 }
2810 
2811 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2812                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2813                                        decode_unicode_fn decode,
2814                                        encode_unicode_fn encode)
2815 {
2816     uint64_t dst = get_address(env, r1);
2817     uint64_t dlen = get_length(env, r1 + 1);
2818     uint64_t src = get_address(env, r2);
2819     uint64_t slen = get_length(env, r2 + 1);
2820     bool enh_check = m3 & 1;
2821     int cc, i;
2822 
2823     /* Lest we fail to service interrupts in a timely manner, limit the
2824        amount of work we're willing to do.  For now, let's cap at 256.  */
2825     for (i = 0; i < 256; ++i) {
2826         uint32_t c, ilen, olen;
2827 
2828         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2829         if (unlikely(cc >= 0)) {
2830             break;
2831         }
2832         cc = encode(env, dst, dlen, ra, c, &olen);
2833         if (unlikely(cc >= 0)) {
2834             break;
2835         }
2836 
2837         src += ilen;
2838         slen -= ilen;
2839         dst += olen;
2840         dlen -= olen;
2841         cc = 3;
2842     }
2843 
2844     set_address(env, r1, dst);
2845     set_length(env, r1 + 1, dlen);
2846     set_address(env, r2, src);
2847     set_length(env, r2 + 1, slen);
2848 
2849     return cc;
2850 }
2851 
2852 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2853 {
2854     return convert_unicode(env, r1, r2, m3, GETPC(),
2855                            decode_utf8, encode_utf16);
2856 }
2857 
2858 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2859 {
2860     return convert_unicode(env, r1, r2, m3, GETPC(),
2861                            decode_utf8, encode_utf32);
2862 }
2863 
2864 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2865 {
2866     return convert_unicode(env, r1, r2, m3, GETPC(),
2867                            decode_utf16, encode_utf8);
2868 }
2869 
2870 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2871 {
2872     return convert_unicode(env, r1, r2, m3, GETPC(),
2873                            decode_utf16, encode_utf32);
2874 }
2875 
2876 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2877 {
2878     return convert_unicode(env, r1, r2, m3, GETPC(),
2879                            decode_utf32, encode_utf8);
2880 }
2881 
2882 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2883 {
2884     return convert_unicode(env, r1, r2, m3, GETPC(),
2885                            decode_utf32, encode_utf16);
2886 }
2887 
2888 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2889                         uintptr_t ra)
2890 {
2891     const int mmu_idx = s390x_env_mmu_index(env, false);
2892 
2893     /* test the actual access, not just any access to the page due to LAP */
2894     while (len) {
2895         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2896         const uint64_t curlen = MIN(pagelen, len);
2897 
2898         probe_write(env, addr, curlen, mmu_idx, ra);
2899         addr = wrap_address(env, addr + curlen);
2900         len -= curlen;
2901     }
2902 }
2903 
2904 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
2905 {
2906     probe_write_access(env, addr, len, GETPC());
2907 }
2908