xref: /qemu/target/mips/tcg/lmmi_helper.c (revision 82ecffa8c050bf5bbc13329e9b65eac1caa5b55c)
1 /*
2  *  Loongson Multimedia Instruction emulation helpers for QEMU.
3  *
4  *  Copyright (c) 2011  Richard Henderson <rth@twiddle.net>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/helper-proto.h"
23 
24 /* If the byte ordering doesn't matter, i.e. all columns are treated
25    identically, then this union can be used directly.  If byte ordering
26    does matter, we generally ignore dumping to memory.  */
27 typedef union {
28     uint8_t  ub[8];
29     int8_t   sb[8];
30     uint16_t uh[4];
31     int16_t  sh[4];
32     uint32_t uw[2];
33     int32_t  sw[2];
34     uint64_t d;
35 } LMIValue;
36 
37 /* Some byte ordering issues can be mitigated by XORing in the following.  */
38 #ifdef HOST_WORDS_BIGENDIAN
39 # define BYTE_ORDER_XOR(N) N
40 #else
41 # define BYTE_ORDER_XOR(N) 0
42 #endif
43 
44 #define SATSB(x)  (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x)
45 #define SATUB(x)  (x > 0xff ? 0xff : x)
46 
47 #define SATSH(x)  (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x)
48 #define SATUH(x)  (x > 0xffff ? 0xffff : x)
49 
50 #define SATSW(x) \
51     (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x)
52 #define SATUW(x)  (x > 0xffffffffull ? 0xffffffffull : x)
53 
54 uint64_t helper_paddsb(uint64_t fs, uint64_t ft)
55 {
56     LMIValue vs, vt;
57     unsigned int i;
58 
59     vs.d = fs;
60     vt.d = ft;
61     for (i = 0; i < 8; ++i) {
62         int r = vs.sb[i] + vt.sb[i];
63         vs.sb[i] = SATSB(r);
64     }
65     return vs.d;
66 }
67 
68 uint64_t helper_paddusb(uint64_t fs, uint64_t ft)
69 {
70     LMIValue vs, vt;
71     unsigned int i;
72 
73     vs.d = fs;
74     vt.d = ft;
75     for (i = 0; i < 8; ++i) {
76         int r = vs.ub[i] + vt.ub[i];
77         vs.ub[i] = SATUB(r);
78     }
79     return vs.d;
80 }
81 
82 uint64_t helper_paddsh(uint64_t fs, uint64_t ft)
83 {
84     LMIValue vs, vt;
85     unsigned int i;
86 
87     vs.d = fs;
88     vt.d = ft;
89     for (i = 0; i < 4; ++i) {
90         int r = vs.sh[i] + vt.sh[i];
91         vs.sh[i] = SATSH(r);
92     }
93     return vs.d;
94 }
95 
96 uint64_t helper_paddush(uint64_t fs, uint64_t ft)
97 {
98     LMIValue vs, vt;
99     unsigned int i;
100 
101     vs.d = fs;
102     vt.d = ft;
103     for (i = 0; i < 4; ++i) {
104         int r = vs.uh[i] + vt.uh[i];
105         vs.uh[i] = SATUH(r);
106     }
107     return vs.d;
108 }
109 
110 uint64_t helper_paddb(uint64_t fs, uint64_t ft)
111 {
112     LMIValue vs, vt;
113     unsigned int i;
114 
115     vs.d = fs;
116     vt.d = ft;
117     for (i = 0; i < 8; ++i) {
118         vs.ub[i] += vt.ub[i];
119     }
120     return vs.d;
121 }
122 
123 uint64_t helper_paddh(uint64_t fs, uint64_t ft)
124 {
125     LMIValue vs, vt;
126     unsigned int i;
127 
128     vs.d = fs;
129     vt.d = ft;
130     for (i = 0; i < 4; ++i) {
131         vs.uh[i] += vt.uh[i];
132     }
133     return vs.d;
134 }
135 
136 uint64_t helper_paddw(uint64_t fs, uint64_t ft)
137 {
138     LMIValue vs, vt;
139     unsigned int i;
140 
141     vs.d = fs;
142     vt.d = ft;
143     for (i = 0; i < 2; ++i) {
144         vs.uw[i] += vt.uw[i];
145     }
146     return vs.d;
147 }
148 
149 uint64_t helper_psubsb(uint64_t fs, uint64_t ft)
150 {
151     LMIValue vs, vt;
152     unsigned int i;
153 
154     vs.d = fs;
155     vt.d = ft;
156     for (i = 0; i < 8; ++i) {
157         int r = vs.sb[i] - vt.sb[i];
158         vs.sb[i] = SATSB(r);
159     }
160     return vs.d;
161 }
162 
163 uint64_t helper_psubusb(uint64_t fs, uint64_t ft)
164 {
165     LMIValue vs, vt;
166     unsigned int i;
167 
168     vs.d = fs;
169     vt.d = ft;
170     for (i = 0; i < 8; ++i) {
171         int r = vs.ub[i] - vt.ub[i];
172         vs.ub[i] = SATUB(r);
173     }
174     return vs.d;
175 }
176 
177 uint64_t helper_psubsh(uint64_t fs, uint64_t ft)
178 {
179     LMIValue vs, vt;
180     unsigned int i;
181 
182     vs.d = fs;
183     vt.d = ft;
184     for (i = 0; i < 4; ++i) {
185         int r = vs.sh[i] - vt.sh[i];
186         vs.sh[i] = SATSH(r);
187     }
188     return vs.d;
189 }
190 
191 uint64_t helper_psubush(uint64_t fs, uint64_t ft)
192 {
193     LMIValue vs, vt;
194     unsigned int i;
195 
196     vs.d = fs;
197     vt.d = ft;
198     for (i = 0; i < 4; ++i) {
199         int r = vs.uh[i] - vt.uh[i];
200         vs.uh[i] = SATUH(r);
201     }
202     return vs.d;
203 }
204 
205 uint64_t helper_psubb(uint64_t fs, uint64_t ft)
206 {
207     LMIValue vs, vt;
208     unsigned int i;
209 
210     vs.d = fs;
211     vt.d = ft;
212     for (i = 0; i < 8; ++i) {
213         vs.ub[i] -= vt.ub[i];
214     }
215     return vs.d;
216 }
217 
218 uint64_t helper_psubh(uint64_t fs, uint64_t ft)
219 {
220     LMIValue vs, vt;
221     unsigned int i;
222 
223     vs.d = fs;
224     vt.d = ft;
225     for (i = 0; i < 4; ++i) {
226         vs.uh[i] -= vt.uh[i];
227     }
228     return vs.d;
229 }
230 
231 uint64_t helper_psubw(uint64_t fs, uint64_t ft)
232 {
233     LMIValue vs, vt;
234     unsigned int i;
235 
236     vs.d = fs;
237     vt.d = ft;
238     for (i = 0; i < 2; ++i) {
239         vs.uw[i] -= vt.uw[i];
240     }
241     return vs.d;
242 }
243 
244 uint64_t helper_pshufh(uint64_t fs, uint64_t ft)
245 {
246     unsigned host = BYTE_ORDER_XOR(3);
247     LMIValue vd, vs;
248     unsigned i;
249 
250     vs.d = fs;
251     vd.d = 0;
252     for (i = 0; i < 4; i++, ft >>= 2) {
253         vd.uh[i ^ host] = vs.uh[(ft & 3) ^ host];
254     }
255     return vd.d;
256 }
257 
258 uint64_t helper_packsswh(uint64_t fs, uint64_t ft)
259 {
260     uint64_t fd = 0;
261     int64_t tmp;
262 
263     tmp = (int32_t)(fs >> 0);
264     tmp = SATSH(tmp);
265     fd |= (tmp & 0xffff) << 0;
266 
267     tmp = (int32_t)(fs >> 32);
268     tmp = SATSH(tmp);
269     fd |= (tmp & 0xffff) << 16;
270 
271     tmp = (int32_t)(ft >> 0);
272     tmp = SATSH(tmp);
273     fd |= (tmp & 0xffff) << 32;
274 
275     tmp = (int32_t)(ft >> 32);
276     tmp = SATSH(tmp);
277     fd |= (tmp & 0xffff) << 48;
278 
279     return fd;
280 }
281 
282 uint64_t helper_packsshb(uint64_t fs, uint64_t ft)
283 {
284     uint64_t fd = 0;
285     unsigned int i;
286 
287     for (i = 0; i < 4; ++i) {
288         int16_t tmp = fs >> (i * 16);
289         tmp = SATSB(tmp);
290         fd |= (uint64_t)(tmp & 0xff) << (i * 8);
291     }
292     for (i = 0; i < 4; ++i) {
293         int16_t tmp = ft >> (i * 16);
294         tmp = SATSB(tmp);
295         fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
296     }
297 
298     return fd;
299 }
300 
301 uint64_t helper_packushb(uint64_t fs, uint64_t ft)
302 {
303     uint64_t fd = 0;
304     unsigned int i;
305 
306     for (i = 0; i < 4; ++i) {
307         int16_t tmp = fs >> (i * 16);
308         tmp = SATUB(tmp);
309         fd |= (uint64_t)(tmp & 0xff) << (i * 8);
310     }
311     for (i = 0; i < 4; ++i) {
312         int16_t tmp = ft >> (i * 16);
313         tmp = SATUB(tmp);
314         fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
315     }
316 
317     return fd;
318 }
319 
320 uint64_t helper_punpcklwd(uint64_t fs, uint64_t ft)
321 {
322     return (fs & 0xffffffff) | (ft << 32);
323 }
324 
325 uint64_t helper_punpckhwd(uint64_t fs, uint64_t ft)
326 {
327     return (fs >> 32) | (ft & ~0xffffffffull);
328 }
329 
330 uint64_t helper_punpcklhw(uint64_t fs, uint64_t ft)
331 {
332     unsigned host = BYTE_ORDER_XOR(3);
333     LMIValue vd, vs, vt;
334 
335     vs.d = fs;
336     vt.d = ft;
337     vd.uh[0 ^ host] = vs.uh[0 ^ host];
338     vd.uh[1 ^ host] = vt.uh[0 ^ host];
339     vd.uh[2 ^ host] = vs.uh[1 ^ host];
340     vd.uh[3 ^ host] = vt.uh[1 ^ host];
341 
342     return vd.d;
343 }
344 
345 uint64_t helper_punpckhhw(uint64_t fs, uint64_t ft)
346 {
347     unsigned host = BYTE_ORDER_XOR(3);
348     LMIValue vd, vs, vt;
349 
350     vs.d = fs;
351     vt.d = ft;
352     vd.uh[0 ^ host] = vs.uh[2 ^ host];
353     vd.uh[1 ^ host] = vt.uh[2 ^ host];
354     vd.uh[2 ^ host] = vs.uh[3 ^ host];
355     vd.uh[3 ^ host] = vt.uh[3 ^ host];
356 
357     return vd.d;
358 }
359 
360 uint64_t helper_punpcklbh(uint64_t fs, uint64_t ft)
361 {
362     unsigned host = BYTE_ORDER_XOR(7);
363     LMIValue vd, vs, vt;
364 
365     vs.d = fs;
366     vt.d = ft;
367     vd.ub[0 ^ host] = vs.ub[0 ^ host];
368     vd.ub[1 ^ host] = vt.ub[0 ^ host];
369     vd.ub[2 ^ host] = vs.ub[1 ^ host];
370     vd.ub[3 ^ host] = vt.ub[1 ^ host];
371     vd.ub[4 ^ host] = vs.ub[2 ^ host];
372     vd.ub[5 ^ host] = vt.ub[2 ^ host];
373     vd.ub[6 ^ host] = vs.ub[3 ^ host];
374     vd.ub[7 ^ host] = vt.ub[3 ^ host];
375 
376     return vd.d;
377 }
378 
379 uint64_t helper_punpckhbh(uint64_t fs, uint64_t ft)
380 {
381     unsigned host = BYTE_ORDER_XOR(7);
382     LMIValue vd, vs, vt;
383 
384     vs.d = fs;
385     vt.d = ft;
386     vd.ub[0 ^ host] = vs.ub[4 ^ host];
387     vd.ub[1 ^ host] = vt.ub[4 ^ host];
388     vd.ub[2 ^ host] = vs.ub[5 ^ host];
389     vd.ub[3 ^ host] = vt.ub[5 ^ host];
390     vd.ub[4 ^ host] = vs.ub[6 ^ host];
391     vd.ub[5 ^ host] = vt.ub[6 ^ host];
392     vd.ub[6 ^ host] = vs.ub[7 ^ host];
393     vd.ub[7 ^ host] = vt.ub[7 ^ host];
394 
395     return vd.d;
396 }
397 
398 uint64_t helper_pavgh(uint64_t fs, uint64_t ft)
399 {
400     LMIValue vs, vt;
401     unsigned i;
402 
403     vs.d = fs;
404     vt.d = ft;
405     for (i = 0; i < 4; i++) {
406         vs.uh[i] = (vs.uh[i] + vt.uh[i] + 1) >> 1;
407     }
408     return vs.d;
409 }
410 
411 uint64_t helper_pavgb(uint64_t fs, uint64_t ft)
412 {
413     LMIValue vs, vt;
414     unsigned i;
415 
416     vs.d = fs;
417     vt.d = ft;
418     for (i = 0; i < 8; i++) {
419         vs.ub[i] = (vs.ub[i] + vt.ub[i] + 1) >> 1;
420     }
421     return vs.d;
422 }
423 
424 uint64_t helper_pmaxsh(uint64_t fs, uint64_t ft)
425 {
426     LMIValue vs, vt;
427     unsigned i;
428 
429     vs.d = fs;
430     vt.d = ft;
431     for (i = 0; i < 4; i++) {
432         vs.sh[i] = (vs.sh[i] >= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
433     }
434     return vs.d;
435 }
436 
437 uint64_t helper_pminsh(uint64_t fs, uint64_t ft)
438 {
439     LMIValue vs, vt;
440     unsigned i;
441 
442     vs.d = fs;
443     vt.d = ft;
444     for (i = 0; i < 4; i++) {
445         vs.sh[i] = (vs.sh[i] <= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
446     }
447     return vs.d;
448 }
449 
450 uint64_t helper_pmaxub(uint64_t fs, uint64_t ft)
451 {
452     LMIValue vs, vt;
453     unsigned i;
454 
455     vs.d = fs;
456     vt.d = ft;
457     for (i = 0; i < 4; i++) {
458         vs.ub[i] = (vs.ub[i] >= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
459     }
460     return vs.d;
461 }
462 
463 uint64_t helper_pminub(uint64_t fs, uint64_t ft)
464 {
465     LMIValue vs, vt;
466     unsigned i;
467 
468     vs.d = fs;
469     vt.d = ft;
470     for (i = 0; i < 4; i++) {
471         vs.ub[i] = (vs.ub[i] <= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
472     }
473     return vs.d;
474 }
475 
476 uint64_t helper_pcmpeqw(uint64_t fs, uint64_t ft)
477 {
478     LMIValue vs, vt;
479     unsigned i;
480 
481     vs.d = fs;
482     vt.d = ft;
483     for (i = 0; i < 2; i++) {
484         vs.uw[i] = -(vs.uw[i] == vt.uw[i]);
485     }
486     return vs.d;
487 }
488 
489 uint64_t helper_pcmpgtw(uint64_t fs, uint64_t ft)
490 {
491     LMIValue vs, vt;
492     unsigned i;
493 
494     vs.d = fs;
495     vt.d = ft;
496     for (i = 0; i < 2; i++) {
497         vs.uw[i] = -(vs.uw[i] > vt.uw[i]);
498     }
499     return vs.d;
500 }
501 
502 uint64_t helper_pcmpeqh(uint64_t fs, uint64_t ft)
503 {
504     LMIValue vs, vt;
505     unsigned i;
506 
507     vs.d = fs;
508     vt.d = ft;
509     for (i = 0; i < 4; i++) {
510         vs.uh[i] = -(vs.uh[i] == vt.uh[i]);
511     }
512     return vs.d;
513 }
514 
515 uint64_t helper_pcmpgth(uint64_t fs, uint64_t ft)
516 {
517     LMIValue vs, vt;
518     unsigned i;
519 
520     vs.d = fs;
521     vt.d = ft;
522     for (i = 0; i < 4; i++) {
523         vs.uh[i] = -(vs.uh[i] > vt.uh[i]);
524     }
525     return vs.d;
526 }
527 
528 uint64_t helper_pcmpeqb(uint64_t fs, uint64_t ft)
529 {
530     LMIValue vs, vt;
531     unsigned i;
532 
533     vs.d = fs;
534     vt.d = ft;
535     for (i = 0; i < 8; i++) {
536         vs.ub[i] = -(vs.ub[i] == vt.ub[i]);
537     }
538     return vs.d;
539 }
540 
541 uint64_t helper_pcmpgtb(uint64_t fs, uint64_t ft)
542 {
543     LMIValue vs, vt;
544     unsigned i;
545 
546     vs.d = fs;
547     vt.d = ft;
548     for (i = 0; i < 8; i++) {
549         vs.ub[i] = -(vs.ub[i] > vt.ub[i]);
550     }
551     return vs.d;
552 }
553 
554 uint64_t helper_psllw(uint64_t fs, uint64_t ft)
555 {
556     LMIValue vs;
557     unsigned i;
558 
559     ft &= 0x7f;
560     if (ft > 31) {
561         return 0;
562     }
563     vs.d = fs;
564     for (i = 0; i < 2; ++i) {
565         vs.uw[i] <<= ft;
566     }
567     return vs.d;
568 }
569 
570 uint64_t helper_psrlw(uint64_t fs, uint64_t ft)
571 {
572     LMIValue vs;
573     unsigned i;
574 
575     ft &= 0x7f;
576     if (ft > 31) {
577         return 0;
578     }
579     vs.d = fs;
580     for (i = 0; i < 2; ++i) {
581         vs.uw[i] >>= ft;
582     }
583     return vs.d;
584 }
585 
586 uint64_t helper_psraw(uint64_t fs, uint64_t ft)
587 {
588     LMIValue vs;
589     unsigned i;
590 
591     ft &= 0x7f;
592     if (ft > 31) {
593         ft = 31;
594     }
595     vs.d = fs;
596     for (i = 0; i < 2; ++i) {
597         vs.sw[i] >>= ft;
598     }
599     return vs.d;
600 }
601 
602 uint64_t helper_psllh(uint64_t fs, uint64_t ft)
603 {
604     LMIValue vs;
605     unsigned i;
606 
607     ft &= 0x7f;
608     if (ft > 15) {
609         return 0;
610     }
611     vs.d = fs;
612     for (i = 0; i < 4; ++i) {
613         vs.uh[i] <<= ft;
614     }
615     return vs.d;
616 }
617 
618 uint64_t helper_psrlh(uint64_t fs, uint64_t ft)
619 {
620     LMIValue vs;
621     unsigned i;
622 
623     ft &= 0x7f;
624     if (ft > 15) {
625         return 0;
626     }
627     vs.d = fs;
628     for (i = 0; i < 4; ++i) {
629         vs.uh[i] >>= ft;
630     }
631     return vs.d;
632 }
633 
634 uint64_t helper_psrah(uint64_t fs, uint64_t ft)
635 {
636     LMIValue vs;
637     unsigned i;
638 
639     ft &= 0x7f;
640     if (ft > 15) {
641         ft = 15;
642     }
643     vs.d = fs;
644     for (i = 0; i < 4; ++i) {
645         vs.sh[i] >>= ft;
646     }
647     return vs.d;
648 }
649 
650 uint64_t helper_pmullh(uint64_t fs, uint64_t ft)
651 {
652     LMIValue vs, vt;
653     unsigned i;
654 
655     vs.d = fs;
656     vt.d = ft;
657     for (i = 0; i < 4; ++i) {
658         vs.sh[i] *= vt.sh[i];
659     }
660     return vs.d;
661 }
662 
663 uint64_t helper_pmulhh(uint64_t fs, uint64_t ft)
664 {
665     LMIValue vs, vt;
666     unsigned i;
667 
668     vs.d = fs;
669     vt.d = ft;
670     for (i = 0; i < 4; ++i) {
671         int32_t r = vs.sh[i] * vt.sh[i];
672         vs.sh[i] = r >> 16;
673     }
674     return vs.d;
675 }
676 
677 uint64_t helper_pmulhuh(uint64_t fs, uint64_t ft)
678 {
679     LMIValue vs, vt;
680     unsigned i;
681 
682     vs.d = fs;
683     vt.d = ft;
684     for (i = 0; i < 4; ++i) {
685         uint32_t r = vs.uh[i] * vt.uh[i];
686         vs.uh[i] = r >> 16;
687     }
688     return vs.d;
689 }
690 
691 uint64_t helper_pmaddhw(uint64_t fs, uint64_t ft)
692 {
693     unsigned host = BYTE_ORDER_XOR(3);
694     LMIValue vs, vt;
695     uint32_t p0, p1;
696 
697     vs.d = fs;
698     vt.d = ft;
699     p0  = vs.sh[0 ^ host] * vt.sh[0 ^ host];
700     p0 += vs.sh[1 ^ host] * vt.sh[1 ^ host];
701     p1  = vs.sh[2 ^ host] * vt.sh[2 ^ host];
702     p1 += vs.sh[3 ^ host] * vt.sh[3 ^ host];
703 
704     return ((uint64_t)p1 << 32) | p0;
705 }
706 
707 uint64_t helper_pasubub(uint64_t fs, uint64_t ft)
708 {
709     LMIValue vs, vt;
710     unsigned i;
711 
712     vs.d = fs;
713     vt.d = ft;
714     for (i = 0; i < 8; ++i) {
715         int r = vs.ub[i] - vt.ub[i];
716         vs.ub[i] = (r < 0 ? -r : r);
717     }
718     return vs.d;
719 }
720 
721 uint64_t helper_biadd(uint64_t fs)
722 {
723     unsigned i, fd;
724 
725     for (i = fd = 0; i < 8; ++i) {
726         fd += (fs >> (i * 8)) & 0xff;
727     }
728     return fd & 0xffff;
729 }
730 
731 uint64_t helper_pmovmskb(uint64_t fs)
732 {
733     unsigned fd = 0;
734 
735     fd |= ((fs >>  7) & 1) << 0;
736     fd |= ((fs >> 15) & 1) << 1;
737     fd |= ((fs >> 23) & 1) << 2;
738     fd |= ((fs >> 31) & 1) << 3;
739     fd |= ((fs >> 39) & 1) << 4;
740     fd |= ((fs >> 47) & 1) << 5;
741     fd |= ((fs >> 55) & 1) << 6;
742     fd |= ((fs >> 63) & 1) << 7;
743 
744     return fd & 0xff;
745 }
746