168d19b58SWeiwei Li /* 268d19b58SWeiwei Li * RISC-V Crypto Emulation Helpers for QEMU. 368d19b58SWeiwei Li * 468d19b58SWeiwei Li * Copyright (c) 2021 Ruibo Lu, luruibo2000@163.com 568d19b58SWeiwei Li * Copyright (c) 2021 Zewen Ye, lustrew@foxmail.com 668d19b58SWeiwei Li * 768d19b58SWeiwei Li * This program is free software; you can redistribute it and/or modify it 868d19b58SWeiwei Li * under the terms and conditions of the GNU General Public License, 968d19b58SWeiwei Li * version 2 or later, as published by the Free Software Foundation. 1068d19b58SWeiwei Li * 1168d19b58SWeiwei Li * This program is distributed in the hope it will be useful, but WITHOUT 1268d19b58SWeiwei Li * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1368d19b58SWeiwei Li * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 1468d19b58SWeiwei Li * more details. 1568d19b58SWeiwei Li * 1668d19b58SWeiwei Li * You should have received a copy of the GNU General Public License along with 1768d19b58SWeiwei Li * this program. If not, see <http://www.gnu.org/licenses/>. 1868d19b58SWeiwei Li */ 1968d19b58SWeiwei Li 2068d19b58SWeiwei Li #include "qemu/osdep.h" 2168d19b58SWeiwei Li #include "cpu.h" 2268d19b58SWeiwei Li #include "exec/exec-all.h" 2368d19b58SWeiwei Li #include "exec/helper-proto.h" 2468d19b58SWeiwei Li #include "crypto/aes.h" 25cad26538SRichard Henderson #include "crypto/aes-round.h" 2668d19b58SWeiwei Li #include "crypto/sm4.h" 2768d19b58SWeiwei Li 2868d19b58SWeiwei Li #define AES_XTIME(a) \ 2968d19b58SWeiwei Li ((a << 1) ^ ((a & 0x80) ? 0x1b : 0)) 3068d19b58SWeiwei Li 3168d19b58SWeiwei Li #define AES_GFMUL(a, b) (( \ 3268d19b58SWeiwei Li (((b) & 0x1) ? (a) : 0) ^ \ 3368d19b58SWeiwei Li (((b) & 0x2) ? AES_XTIME(a) : 0) ^ \ 3468d19b58SWeiwei Li (((b) & 0x4) ? AES_XTIME(AES_XTIME(a)) : 0) ^ \ 3568d19b58SWeiwei Li (((b) & 0x8) ? AES_XTIME(AES_XTIME(AES_XTIME(a))) : 0)) & 0xFF) 3668d19b58SWeiwei Li 3768d19b58SWeiwei Li static inline uint32_t aes_mixcolumn_byte(uint8_t x, bool fwd) 3868d19b58SWeiwei Li { 3968d19b58SWeiwei Li uint32_t u; 4068d19b58SWeiwei Li 4168d19b58SWeiwei Li if (fwd) { 4268d19b58SWeiwei Li u = (AES_GFMUL(x, 3) << 24) | (x << 16) | (x << 8) | 4368d19b58SWeiwei Li (AES_GFMUL(x, 2) << 0); 4468d19b58SWeiwei Li } else { 4568d19b58SWeiwei Li u = (AES_GFMUL(x, 0xb) << 24) | (AES_GFMUL(x, 0xd) << 16) | 4668d19b58SWeiwei Li (AES_GFMUL(x, 0x9) << 8) | (AES_GFMUL(x, 0xe) << 0); 4768d19b58SWeiwei Li } 4868d19b58SWeiwei Li return u; 4968d19b58SWeiwei Li } 5068d19b58SWeiwei Li 5168d19b58SWeiwei Li #define sext32_xlen(x) (target_ulong)(int32_t)(x) 5268d19b58SWeiwei Li 5368d19b58SWeiwei Li static inline target_ulong aes32_operation(target_ulong shamt, 5468d19b58SWeiwei Li target_ulong rs1, target_ulong rs2, 5568d19b58SWeiwei Li bool enc, bool mix) 5668d19b58SWeiwei Li { 5768d19b58SWeiwei Li uint8_t si = rs2 >> shamt; 5868d19b58SWeiwei Li uint8_t so; 5968d19b58SWeiwei Li uint32_t mixed; 6068d19b58SWeiwei Li target_ulong res; 6168d19b58SWeiwei Li 6268d19b58SWeiwei Li if (enc) { 6368d19b58SWeiwei Li so = AES_sbox[si]; 6468d19b58SWeiwei Li if (mix) { 6568d19b58SWeiwei Li mixed = aes_mixcolumn_byte(so, true); 6668d19b58SWeiwei Li } else { 6768d19b58SWeiwei Li mixed = so; 6868d19b58SWeiwei Li } 6968d19b58SWeiwei Li } else { 7068d19b58SWeiwei Li so = AES_isbox[si]; 7168d19b58SWeiwei Li if (mix) { 7268d19b58SWeiwei Li mixed = aes_mixcolumn_byte(so, false); 7368d19b58SWeiwei Li } else { 7468d19b58SWeiwei Li mixed = so; 7568d19b58SWeiwei Li } 7668d19b58SWeiwei Li } 7768d19b58SWeiwei Li mixed = rol32(mixed, shamt); 7868d19b58SWeiwei Li res = rs1 ^ mixed; 7968d19b58SWeiwei Li 8068d19b58SWeiwei Li return sext32_xlen(res); 8168d19b58SWeiwei Li } 8268d19b58SWeiwei Li 8368d19b58SWeiwei Li target_ulong HELPER(aes32esmi)(target_ulong rs1, target_ulong rs2, 8468d19b58SWeiwei Li target_ulong shamt) 8568d19b58SWeiwei Li { 8668d19b58SWeiwei Li return aes32_operation(shamt, rs1, rs2, true, true); 8768d19b58SWeiwei Li } 8868d19b58SWeiwei Li 8968d19b58SWeiwei Li target_ulong HELPER(aes32esi)(target_ulong rs1, target_ulong rs2, 9068d19b58SWeiwei Li target_ulong shamt) 9168d19b58SWeiwei Li { 9268d19b58SWeiwei Li return aes32_operation(shamt, rs1, rs2, true, false); 9368d19b58SWeiwei Li } 9468d19b58SWeiwei Li 9568d19b58SWeiwei Li target_ulong HELPER(aes32dsmi)(target_ulong rs1, target_ulong rs2, 9668d19b58SWeiwei Li target_ulong shamt) 9768d19b58SWeiwei Li { 9868d19b58SWeiwei Li return aes32_operation(shamt, rs1, rs2, false, true); 9968d19b58SWeiwei Li } 10068d19b58SWeiwei Li 10168d19b58SWeiwei Li target_ulong HELPER(aes32dsi)(target_ulong rs1, target_ulong rs2, 10268d19b58SWeiwei Li target_ulong shamt) 10368d19b58SWeiwei Li { 10468d19b58SWeiwei Li return aes32_operation(shamt, rs1, rs2, false, false); 10568d19b58SWeiwei Li } 1069e33e175SWeiwei Li 1079e33e175SWeiwei Li #define BY(X, I) ((X >> (8 * I)) & 0xFF) 1089e33e175SWeiwei Li 1099e33e175SWeiwei Li #define AES_SHIFROWS_LO(RS1, RS2) ( \ 1109e33e175SWeiwei Li (((RS1 >> 24) & 0xFF) << 56) | (((RS2 >> 48) & 0xFF) << 48) | \ 1119e33e175SWeiwei Li (((RS2 >> 8) & 0xFF) << 40) | (((RS1 >> 32) & 0xFF) << 32) | \ 1129e33e175SWeiwei Li (((RS2 >> 56) & 0xFF) << 24) | (((RS2 >> 16) & 0xFF) << 16) | \ 1139e33e175SWeiwei Li (((RS1 >> 40) & 0xFF) << 8) | (((RS1 >> 0) & 0xFF) << 0)) 1149e33e175SWeiwei Li 1159e33e175SWeiwei Li #define AES_INVSHIFROWS_LO(RS1, RS2) ( \ 1169e33e175SWeiwei Li (((RS2 >> 24) & 0xFF) << 56) | (((RS2 >> 48) & 0xFF) << 48) | \ 1179e33e175SWeiwei Li (((RS1 >> 8) & 0xFF) << 40) | (((RS1 >> 32) & 0xFF) << 32) | \ 1189e33e175SWeiwei Li (((RS1 >> 56) & 0xFF) << 24) | (((RS2 >> 16) & 0xFF) << 16) | \ 1199e33e175SWeiwei Li (((RS2 >> 40) & 0xFF) << 8) | (((RS1 >> 0) & 0xFF) << 0)) 1209e33e175SWeiwei Li 1219e33e175SWeiwei Li #define AES_MIXBYTE(COL, B0, B1, B2, B3) ( \ 1229e33e175SWeiwei Li BY(COL, B3) ^ BY(COL, B2) ^ AES_GFMUL(BY(COL, B1), 3) ^ \ 1239e33e175SWeiwei Li AES_GFMUL(BY(COL, B0), 2)) 1249e33e175SWeiwei Li 1259e33e175SWeiwei Li #define AES_MIXCOLUMN(COL) ( \ 1269e33e175SWeiwei Li AES_MIXBYTE(COL, 3, 0, 1, 2) << 24 | \ 1279e33e175SWeiwei Li AES_MIXBYTE(COL, 2, 3, 0, 1) << 16 | \ 1289e33e175SWeiwei Li AES_MIXBYTE(COL, 1, 2, 3, 0) << 8 | AES_MIXBYTE(COL, 0, 1, 2, 3) << 0) 1299e33e175SWeiwei Li 1309e33e175SWeiwei Li #define AES_INVMIXBYTE(COL, B0, B1, B2, B3) ( \ 1319e33e175SWeiwei Li AES_GFMUL(BY(COL, B3), 0x9) ^ AES_GFMUL(BY(COL, B2), 0xd) ^ \ 1329e33e175SWeiwei Li AES_GFMUL(BY(COL, B1), 0xb) ^ AES_GFMUL(BY(COL, B0), 0xe)) 1339e33e175SWeiwei Li 1349e33e175SWeiwei Li #define AES_INVMIXCOLUMN(COL) ( \ 1359e33e175SWeiwei Li AES_INVMIXBYTE(COL, 3, 0, 1, 2) << 24 | \ 1369e33e175SWeiwei Li AES_INVMIXBYTE(COL, 2, 3, 0, 1) << 16 | \ 1379e33e175SWeiwei Li AES_INVMIXBYTE(COL, 1, 2, 3, 0) << 8 | \ 1389e33e175SWeiwei Li AES_INVMIXBYTE(COL, 0, 1, 2, 3) << 0) 1399e33e175SWeiwei Li 140cad26538SRichard Henderson static const AESState aes_zero = { }; 141cad26538SRichard Henderson 1429e33e175SWeiwei Li static inline target_ulong aes64_operation(target_ulong rs1, target_ulong rs2, 1439e33e175SWeiwei Li bool enc, bool mix) 1449e33e175SWeiwei Li { 1459e33e175SWeiwei Li uint64_t RS1 = rs1; 1469e33e175SWeiwei Li uint64_t RS2 = rs2; 1479e33e175SWeiwei Li uint64_t result; 1489e33e175SWeiwei Li uint64_t temp; 1499e33e175SWeiwei Li uint32_t col_0; 1509e33e175SWeiwei Li uint32_t col_1; 1519e33e175SWeiwei Li 1529e33e175SWeiwei Li if (enc) { 1539e33e175SWeiwei Li temp = AES_SHIFROWS_LO(RS1, RS2); 1549e33e175SWeiwei Li temp = (((uint64_t)AES_sbox[(temp >> 0) & 0xFF] << 0) | 1559e33e175SWeiwei Li ((uint64_t)AES_sbox[(temp >> 8) & 0xFF] << 8) | 1569e33e175SWeiwei Li ((uint64_t)AES_sbox[(temp >> 16) & 0xFF] << 16) | 1579e33e175SWeiwei Li ((uint64_t)AES_sbox[(temp >> 24) & 0xFF] << 24) | 1589e33e175SWeiwei Li ((uint64_t)AES_sbox[(temp >> 32) & 0xFF] << 32) | 1599e33e175SWeiwei Li ((uint64_t)AES_sbox[(temp >> 40) & 0xFF] << 40) | 1609e33e175SWeiwei Li ((uint64_t)AES_sbox[(temp >> 48) & 0xFF] << 48) | 1619e33e175SWeiwei Li ((uint64_t)AES_sbox[(temp >> 56) & 0xFF] << 56)); 1629e33e175SWeiwei Li if (mix) { 1639e33e175SWeiwei Li col_0 = temp & 0xFFFFFFFF; 1649e33e175SWeiwei Li col_1 = temp >> 32; 1659e33e175SWeiwei Li 1669e33e175SWeiwei Li col_0 = AES_MIXCOLUMN(col_0); 1679e33e175SWeiwei Li col_1 = AES_MIXCOLUMN(col_1); 1689e33e175SWeiwei Li 1699e33e175SWeiwei Li result = ((uint64_t)col_1 << 32) | col_0; 1709e33e175SWeiwei Li } else { 1719e33e175SWeiwei Li result = temp; 1729e33e175SWeiwei Li } 1739e33e175SWeiwei Li } else { 1749e33e175SWeiwei Li temp = AES_INVSHIFROWS_LO(RS1, RS2); 1759e33e175SWeiwei Li temp = (((uint64_t)AES_isbox[(temp >> 0) & 0xFF] << 0) | 1769e33e175SWeiwei Li ((uint64_t)AES_isbox[(temp >> 8) & 0xFF] << 8) | 1779e33e175SWeiwei Li ((uint64_t)AES_isbox[(temp >> 16) & 0xFF] << 16) | 1789e33e175SWeiwei Li ((uint64_t)AES_isbox[(temp >> 24) & 0xFF] << 24) | 1799e33e175SWeiwei Li ((uint64_t)AES_isbox[(temp >> 32) & 0xFF] << 32) | 1809e33e175SWeiwei Li ((uint64_t)AES_isbox[(temp >> 40) & 0xFF] << 40) | 1819e33e175SWeiwei Li ((uint64_t)AES_isbox[(temp >> 48) & 0xFF] << 48) | 1829e33e175SWeiwei Li ((uint64_t)AES_isbox[(temp >> 56) & 0xFF] << 56)); 1839e33e175SWeiwei Li if (mix) { 1849e33e175SWeiwei Li col_0 = temp & 0xFFFFFFFF; 1859e33e175SWeiwei Li col_1 = temp >> 32; 1869e33e175SWeiwei Li 1879e33e175SWeiwei Li col_0 = AES_INVMIXCOLUMN(col_0); 1889e33e175SWeiwei Li col_1 = AES_INVMIXCOLUMN(col_1); 1899e33e175SWeiwei Li 1909e33e175SWeiwei Li result = ((uint64_t)col_1 << 32) | col_0; 1919e33e175SWeiwei Li } else { 1929e33e175SWeiwei Li result = temp; 1939e33e175SWeiwei Li } 1949e33e175SWeiwei Li } 1959e33e175SWeiwei Li 1969e33e175SWeiwei Li return result; 1979e33e175SWeiwei Li } 1989e33e175SWeiwei Li 1999e33e175SWeiwei Li target_ulong HELPER(aes64esm)(target_ulong rs1, target_ulong rs2) 2009e33e175SWeiwei Li { 2019e33e175SWeiwei Li return aes64_operation(rs1, rs2, true, true); 2029e33e175SWeiwei Li } 2039e33e175SWeiwei Li 2049e33e175SWeiwei Li target_ulong HELPER(aes64es)(target_ulong rs1, target_ulong rs2) 2059e33e175SWeiwei Li { 206cad26538SRichard Henderson AESState t; 207cad26538SRichard Henderson 208cad26538SRichard Henderson t.d[HOST_BIG_ENDIAN] = rs1; 209cad26538SRichard Henderson t.d[!HOST_BIG_ENDIAN] = rs2; 210cad26538SRichard Henderson aesenc_SB_SR_AK(&t, &t, &aes_zero, false); 211cad26538SRichard Henderson return t.d[HOST_BIG_ENDIAN]; 2129e33e175SWeiwei Li } 2139e33e175SWeiwei Li 2149e33e175SWeiwei Li target_ulong HELPER(aes64ds)(target_ulong rs1, target_ulong rs2) 2159e33e175SWeiwei Li { 2167a70583aSRichard Henderson AESState t; 2177a70583aSRichard Henderson 2187a70583aSRichard Henderson t.d[HOST_BIG_ENDIAN] = rs1; 2197a70583aSRichard Henderson t.d[!HOST_BIG_ENDIAN] = rs2; 2207a70583aSRichard Henderson aesdec_ISB_ISR_AK(&t, &t, &aes_zero, false); 2217a70583aSRichard Henderson return t.d[HOST_BIG_ENDIAN]; 2229e33e175SWeiwei Li } 2239e33e175SWeiwei Li 2249e33e175SWeiwei Li target_ulong HELPER(aes64dsm)(target_ulong rs1, target_ulong rs2) 2259e33e175SWeiwei Li { 2269e33e175SWeiwei Li return aes64_operation(rs1, rs2, false, true); 2279e33e175SWeiwei Li } 2289e33e175SWeiwei Li 2299e33e175SWeiwei Li target_ulong HELPER(aes64ks2)(target_ulong rs1, target_ulong rs2) 2309e33e175SWeiwei Li { 2319e33e175SWeiwei Li uint64_t RS1 = rs1; 2329e33e175SWeiwei Li uint64_t RS2 = rs2; 2339e33e175SWeiwei Li uint32_t rs1_hi = RS1 >> 32; 2349e33e175SWeiwei Li uint32_t rs2_lo = RS2; 2359e33e175SWeiwei Li uint32_t rs2_hi = RS2 >> 32; 2369e33e175SWeiwei Li 2379e33e175SWeiwei Li uint32_t r_lo = (rs1_hi ^ rs2_lo); 2389e33e175SWeiwei Li uint32_t r_hi = (rs1_hi ^ rs2_lo ^ rs2_hi); 2399e33e175SWeiwei Li target_ulong result = ((uint64_t)r_hi << 32) | r_lo; 2409e33e175SWeiwei Li 2419e33e175SWeiwei Li return result; 2429e33e175SWeiwei Li } 2439e33e175SWeiwei Li 2449e33e175SWeiwei Li target_ulong HELPER(aes64ks1i)(target_ulong rs1, target_ulong rnum) 2459e33e175SWeiwei Li { 2469e33e175SWeiwei Li uint64_t RS1 = rs1; 2479e33e175SWeiwei Li static const uint8_t round_consts[10] = { 2489e33e175SWeiwei Li 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 2499e33e175SWeiwei Li }; 2509e33e175SWeiwei Li 2519e33e175SWeiwei Li uint8_t enc_rnum = rnum; 2529e33e175SWeiwei Li uint32_t temp = (RS1 >> 32) & 0xFFFFFFFF; 2539e33e175SWeiwei Li uint8_t rcon_ = 0; 2549e33e175SWeiwei Li target_ulong result; 2559e33e175SWeiwei Li 2569e33e175SWeiwei Li if (enc_rnum != 0xA) { 2579e33e175SWeiwei Li temp = ror32(temp, 8); /* Rotate right by 8 */ 2589e33e175SWeiwei Li rcon_ = round_consts[enc_rnum]; 2599e33e175SWeiwei Li } 2609e33e175SWeiwei Li 2619e33e175SWeiwei Li temp = ((uint32_t)AES_sbox[(temp >> 24) & 0xFF] << 24) | 2629e33e175SWeiwei Li ((uint32_t)AES_sbox[(temp >> 16) & 0xFF] << 16) | 2639e33e175SWeiwei Li ((uint32_t)AES_sbox[(temp >> 8) & 0xFF] << 8) | 2649e33e175SWeiwei Li ((uint32_t)AES_sbox[(temp >> 0) & 0xFF] << 0); 2659e33e175SWeiwei Li 2669e33e175SWeiwei Li temp ^= rcon_; 2679e33e175SWeiwei Li 2689e33e175SWeiwei Li result = ((uint64_t)temp << 32) | temp; 2699e33e175SWeiwei Li 2709e33e175SWeiwei Li return result; 2719e33e175SWeiwei Li } 2729e33e175SWeiwei Li 2739e33e175SWeiwei Li target_ulong HELPER(aes64im)(target_ulong rs1) 2749e33e175SWeiwei Li { 275*607a5f9dSRichard Henderson AESState t; 2769e33e175SWeiwei Li 277*607a5f9dSRichard Henderson t.d[HOST_BIG_ENDIAN] = rs1; 278*607a5f9dSRichard Henderson t.d[!HOST_BIG_ENDIAN] = 0; 279*607a5f9dSRichard Henderson aesdec_IMC(&t, &t, false); 280*607a5f9dSRichard Henderson return t.d[HOST_BIG_ENDIAN]; 2819e33e175SWeiwei Li } 2820976083dSWeiwei Li 2830976083dSWeiwei Li target_ulong HELPER(sm4ed)(target_ulong rs1, target_ulong rs2, 2840976083dSWeiwei Li target_ulong shamt) 2850976083dSWeiwei Li { 2860976083dSWeiwei Li uint32_t sb_in = (uint8_t)(rs2 >> shamt); 2870976083dSWeiwei Li uint32_t sb_out = (uint32_t)sm4_sbox[sb_in]; 2880976083dSWeiwei Li 2890976083dSWeiwei Li uint32_t x = sb_out ^ (sb_out << 8) ^ (sb_out << 2) ^ (sb_out << 18) ^ 2900976083dSWeiwei Li ((sb_out & 0x3f) << 26) ^ ((sb_out & 0xC0) << 10); 2910976083dSWeiwei Li 2920976083dSWeiwei Li uint32_t rotl = rol32(x, shamt); 2930976083dSWeiwei Li 2940976083dSWeiwei Li return sext32_xlen(rotl ^ (uint32_t)rs1); 2950976083dSWeiwei Li } 2960976083dSWeiwei Li 2970976083dSWeiwei Li target_ulong HELPER(sm4ks)(target_ulong rs1, target_ulong rs2, 2980976083dSWeiwei Li target_ulong shamt) 2990976083dSWeiwei Li { 3000976083dSWeiwei Li uint32_t sb_in = (uint8_t)(rs2 >> shamt); 3010976083dSWeiwei Li uint32_t sb_out = sm4_sbox[sb_in]; 3020976083dSWeiwei Li 3030976083dSWeiwei Li uint32_t x = sb_out ^ ((sb_out & 0x07) << 29) ^ ((sb_out & 0xFE) << 7) ^ 3040976083dSWeiwei Li ((sb_out & 0x01) << 23) ^ ((sb_out & 0xF8) << 13); 3050976083dSWeiwei Li 3060976083dSWeiwei Li uint32_t rotl = rol32(x, shamt); 3070976083dSWeiwei Li 3080976083dSWeiwei Li return sext32_xlen(rotl ^ (uint32_t)rs1); 3090976083dSWeiwei Li } 31068d19b58SWeiwei Li #undef sext32_xlen 311