1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON 4 * 5 * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> 6 */ 7 8#include <linux/linkage.h> 9#include <asm/assembler.h> 10 11#define AES_ENTRY(func) ENTRY(neon_ ## func) 12#define AES_ENDPROC(func) ENDPROC(neon_ ## func) 13 14 xtsmask .req v7 15 16 .macro xts_reload_mask, tmp 17 xts_load_mask \tmp 18 .endm 19 20 /* multiply by polynomial 'x' in GF(2^8) */ 21 .macro mul_by_x, out, in, temp, const 22 sshr \temp, \in, #7 23 shl \out, \in, #1 24 and \temp, \temp, \const 25 eor \out, \out, \temp 26 .endm 27 28 /* multiply by polynomial 'x^2' in GF(2^8) */ 29 .macro mul_by_x2, out, in, temp, const 30 ushr \temp, \in, #6 31 shl \out, \in, #2 32 pmul \temp, \temp, \const 33 eor \out, \out, \temp 34 .endm 35 36 /* preload the entire Sbox */ 37 .macro prepare, sbox, shiftrows, temp 38 movi v12.16b, #0x1b 39 ldr_l q13, \shiftrows, \temp 40 ldr_l q14, .Lror32by8, \temp 41 adr_l \temp, \sbox 42 ld1 {v16.16b-v19.16b}, [\temp], #64 43 ld1 {v20.16b-v23.16b}, [\temp], #64 44 ld1 {v24.16b-v27.16b}, [\temp], #64 45 ld1 {v28.16b-v31.16b}, [\temp] 46 .endm 47 48 /* do preload for encryption */ 49 .macro enc_prepare, ignore0, ignore1, temp 50 prepare .LForward_Sbox, .LForward_ShiftRows, \temp 51 .endm 52 53 .macro enc_switch_key, ignore0, ignore1, temp 54 /* do nothing */ 55 .endm 56 57 /* do preload for decryption */ 58 .macro dec_prepare, ignore0, ignore1, temp 59 prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp 60 .endm 61 62 /* apply SubBytes transformation using the the preloaded Sbox */ 63 .macro sub_bytes, in 64 sub v9.16b, \in\().16b, v15.16b 65 tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b 66 sub v10.16b, v9.16b, v15.16b 67 tbx \in\().16b, {v20.16b-v23.16b}, v9.16b 68 sub v11.16b, v10.16b, v15.16b 69 tbx \in\().16b, {v24.16b-v27.16b}, v10.16b 70 tbx \in\().16b, {v28.16b-v31.16b}, v11.16b 71 .endm 72 73 /* apply MixColumns transformation */ 74 .macro mix_columns, in, enc 75 .if \enc == 0 76 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ 77 mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b 78 eor \in\().16b, \in\().16b, v8.16b 79 rev32 v8.8h, v8.8h 80 eor \in\().16b, \in\().16b, v8.16b 81 .endif 82 83 mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b 84 rev32 v8.8h, \in\().8h 85 eor v8.16b, v8.16b, v9.16b 86 eor \in\().16b, \in\().16b, v8.16b 87 tbl \in\().16b, {\in\().16b}, v14.16b 88 eor \in\().16b, \in\().16b, v8.16b 89 .endm 90 91 .macro do_block, enc, in, rounds, rk, rkp, i 92 ld1 {v15.4s}, [\rk] 93 add \rkp, \rk, #16 94 mov \i, \rounds 951111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ 96 movi v15.16b, #0x40 97 tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ 98 sub_bytes \in 99 subs \i, \i, #1 100 ld1 {v15.4s}, [\rkp], #16 101 beq 2222f 102 mix_columns \in, \enc 103 b 1111b 1042222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ 105 .endm 106 107 .macro encrypt_block, in, rounds, rk, rkp, i 108 do_block 1, \in, \rounds, \rk, \rkp, \i 109 .endm 110 111 .macro decrypt_block, in, rounds, rk, rkp, i 112 do_block 0, \in, \rounds, \rk, \rkp, \i 113 .endm 114 115 /* 116 * Interleaved versions: functionally equivalent to the 117 * ones above, but applied to 2 or 4 AES states in parallel. 118 */ 119 120 .macro sub_bytes_2x, in0, in1 121 sub v8.16b, \in0\().16b, v15.16b 122 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b 123 sub v9.16b, \in1\().16b, v15.16b 124 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b 125 sub v10.16b, v8.16b, v15.16b 126 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b 127 sub v11.16b, v9.16b, v15.16b 128 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b 129 sub v8.16b, v10.16b, v15.16b 130 tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b 131 sub v9.16b, v11.16b, v15.16b 132 tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b 133 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b 134 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b 135 .endm 136 137 .macro sub_bytes_4x, in0, in1, in2, in3 138 sub v8.16b, \in0\().16b, v15.16b 139 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b 140 sub v9.16b, \in1\().16b, v15.16b 141 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b 142 sub v10.16b, \in2\().16b, v15.16b 143 tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b 144 sub v11.16b, \in3\().16b, v15.16b 145 tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b 146 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b 147 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b 148 sub v8.16b, v8.16b, v15.16b 149 tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b 150 sub v9.16b, v9.16b, v15.16b 151 tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b 152 sub v10.16b, v10.16b, v15.16b 153 tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b 154 sub v11.16b, v11.16b, v15.16b 155 tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b 156 sub v8.16b, v8.16b, v15.16b 157 tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b 158 sub v9.16b, v9.16b, v15.16b 159 tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b 160 sub v10.16b, v10.16b, v15.16b 161 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b 162 sub v11.16b, v11.16b, v15.16b 163 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b 164 tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b 165 tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b 166 .endm 167 168 .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const 169 sshr \tmp0\().16b, \in0\().16b, #7 170 shl \out0\().16b, \in0\().16b, #1 171 sshr \tmp1\().16b, \in1\().16b, #7 172 and \tmp0\().16b, \tmp0\().16b, \const\().16b 173 shl \out1\().16b, \in1\().16b, #1 174 and \tmp1\().16b, \tmp1\().16b, \const\().16b 175 eor \out0\().16b, \out0\().16b, \tmp0\().16b 176 eor \out1\().16b, \out1\().16b, \tmp1\().16b 177 .endm 178 179 .macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const 180 ushr \tmp0\().16b, \in0\().16b, #6 181 shl \out0\().16b, \in0\().16b, #2 182 ushr \tmp1\().16b, \in1\().16b, #6 183 pmul \tmp0\().16b, \tmp0\().16b, \const\().16b 184 shl \out1\().16b, \in1\().16b, #2 185 pmul \tmp1\().16b, \tmp1\().16b, \const\().16b 186 eor \out0\().16b, \out0\().16b, \tmp0\().16b 187 eor \out1\().16b, \out1\().16b, \tmp1\().16b 188 .endm 189 190 .macro mix_columns_2x, in0, in1, enc 191 .if \enc == 0 192 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ 193 mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12 194 eor \in0\().16b, \in0\().16b, v8.16b 195 rev32 v8.8h, v8.8h 196 eor \in1\().16b, \in1\().16b, v9.16b 197 rev32 v9.8h, v9.8h 198 eor \in0\().16b, \in0\().16b, v8.16b 199 eor \in1\().16b, \in1\().16b, v9.16b 200 .endif 201 202 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12 203 rev32 v10.8h, \in0\().8h 204 rev32 v11.8h, \in1\().8h 205 eor v10.16b, v10.16b, v8.16b 206 eor v11.16b, v11.16b, v9.16b 207 eor \in0\().16b, \in0\().16b, v10.16b 208 eor \in1\().16b, \in1\().16b, v11.16b 209 tbl \in0\().16b, {\in0\().16b}, v14.16b 210 tbl \in1\().16b, {\in1\().16b}, v14.16b 211 eor \in0\().16b, \in0\().16b, v10.16b 212 eor \in1\().16b, \in1\().16b, v11.16b 213 .endm 214 215 .macro do_block_2x, enc, in0, in1, rounds, rk, rkp, i 216 ld1 {v15.4s}, [\rk] 217 add \rkp, \rk, #16 218 mov \i, \rounds 2191111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 220 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 221 movi v15.16b, #0x40 222 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ 223 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ 224 sub_bytes_2x \in0, \in1 225 subs \i, \i, #1 226 ld1 {v15.4s}, [\rkp], #16 227 beq 2222f 228 mix_columns_2x \in0, \in1, \enc 229 b 1111b 2302222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 231 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 232 .endm 233 234 .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i 235 ld1 {v15.4s}, [\rk] 236 add \rkp, \rk, #16 237 mov \i, \rounds 2381111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 239 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 240 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ 241 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ 242 movi v15.16b, #0x40 243 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ 244 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ 245 tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ 246 tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ 247 sub_bytes_4x \in0, \in1, \in2, \in3 248 subs \i, \i, #1 249 ld1 {v15.4s}, [\rkp], #16 250 beq 2222f 251 mix_columns_2x \in0, \in1, \enc 252 mix_columns_2x \in2, \in3, \enc 253 b 1111b 2542222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 255 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 256 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ 257 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ 258 .endm 259 260 .macro encrypt_block2x, in0, in1, rounds, rk, rkp, i 261 do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i 262 .endm 263 264 .macro decrypt_block2x, in0, in1, rounds, rk, rkp, i 265 do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i 266 .endm 267 268 .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i 269 do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i 270 .endm 271 272 .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i 273 do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i 274 .endm 275 276#include "aes-modes.S" 277 278 .section ".rodata", "a" 279 .align 6 280.LForward_Sbox: 281 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 282 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 283 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 284 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 285 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 286 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 287 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 288 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 289 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 290 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 291 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 292 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 293 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 294 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 295 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 296 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 297 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 298 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 299 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 300 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 301 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 302 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 303 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 304 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 305 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 306 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 307 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 308 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 309 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 310 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 311 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 312 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 313 314.LReverse_Sbox: 315 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 316 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 317 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 318 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 319 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 320 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 321 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 322 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 323 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 324 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 325 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 326 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 327 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 328 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 329 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 330 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 331 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 332 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 333 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 334 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 335 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 336 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 337 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 338 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 339 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 340 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 341 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 342 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 343 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 344 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 345 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 346 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 347 348.LForward_ShiftRows: 349 .octa 0x0b06010c07020d08030e09040f0a0500 350 351.LReverse_ShiftRows: 352 .octa 0x0306090c0f0205080b0e0104070a0d00 353 354.Lror32by8: 355 .octa 0x0c0f0e0d080b0a090407060500030201 356