1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#if !defined(__has_feature) 5#define __has_feature(x) 0 6#endif 7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 8#define OPENSSL_NO_ASM 9#endif 10 11#if !defined(OPENSSL_NO_ASM) 12#include <GFp/arm_arch.h> 13 14#if __ARM_MAX_ARCH__>=7 15.text 16 17 18.code 32 19#undef __thumb2__ 20.align 5 21Lrcon: 22.long 0x01,0x01,0x01,0x01 23.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat 24.long 0x1b,0x1b,0x1b,0x1b 25 26.text 27 28.globl _GFp_aes_hw_set_encrypt_key 29.private_extern _GFp_aes_hw_set_encrypt_key 30#ifdef __thumb2__ 31.thumb_func _GFp_aes_hw_set_encrypt_key 32#endif 33.align 5 34_GFp_aes_hw_set_encrypt_key: 35Lenc_key: 36 mov r3,#-1 37 cmp r0,#0 38 beq Lenc_key_abort 39 cmp r2,#0 40 beq Lenc_key_abort 41 mov r3,#-2 42 cmp r1,#128 43 blt Lenc_key_abort 44 cmp r1,#256 45 bgt Lenc_key_abort 46 tst r1,#0x3f 47 bne Lenc_key_abort 48 49 adr r3,Lrcon 50 cmp r1,#192 51 52 veor q0,q0,q0 53 vld1.8 {q3},[r0]! 54 mov r1,#8 @ reuse r1 55 vld1.32 {q1,q2},[r3]! 56 57 blt Loop128 58 @ 192-bit key support was removed. 59 b L256 60 61.align 4 62Loop128: 63 vtbl.8 d20,{q3},d4 64 vtbl.8 d21,{q3},d5 65 vext.8 q9,q0,q3,#12 66 vst1.32 {q3},[r2]! 67.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 68 subs r1,r1,#1 69 70 veor q3,q3,q9 71 vext.8 q9,q0,q9,#12 72 veor q3,q3,q9 73 vext.8 q9,q0,q9,#12 74 veor q10,q10,q1 75 veor q3,q3,q9 76 vshl.u8 q1,q1,#1 77 veor q3,q3,q10 78 bne Loop128 79 80 vld1.32 {q1},[r3] 81 82 vtbl.8 d20,{q3},d4 83 vtbl.8 d21,{q3},d5 84 vext.8 q9,q0,q3,#12 85 vst1.32 {q3},[r2]! 86.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 87 88 veor q3,q3,q9 89 vext.8 q9,q0,q9,#12 90 veor q3,q3,q9 91 vext.8 q9,q0,q9,#12 92 veor q10,q10,q1 93 veor q3,q3,q9 94 vshl.u8 q1,q1,#1 95 veor q3,q3,q10 96 97 vtbl.8 d20,{q3},d4 98 vtbl.8 d21,{q3},d5 99 vext.8 q9,q0,q3,#12 100 vst1.32 {q3},[r2]! 101.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 102 103 veor q3,q3,q9 104 vext.8 q9,q0,q9,#12 105 veor q3,q3,q9 106 vext.8 q9,q0,q9,#12 107 veor q10,q10,q1 108 veor q3,q3,q9 109 veor q3,q3,q10 110 vst1.32 {q3},[r2] 111 add r2,r2,#0x50 112 113 mov r12,#10 114 b Ldone 115 116@ 192-bit key support was removed. 117 118.align 4 119L256: 120 vld1.8 {q8},[r0] 121 mov r1,#7 122 mov r12,#14 123 vst1.32 {q3},[r2]! 124 125Loop256: 126 vtbl.8 d20,{q8},d4 127 vtbl.8 d21,{q8},d5 128 vext.8 q9,q0,q3,#12 129 vst1.32 {q8},[r2]! 130.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 131 subs r1,r1,#1 132 133 veor q3,q3,q9 134 vext.8 q9,q0,q9,#12 135 veor q3,q3,q9 136 vext.8 q9,q0,q9,#12 137 veor q10,q10,q1 138 veor q3,q3,q9 139 vshl.u8 q1,q1,#1 140 veor q3,q3,q10 141 vst1.32 {q3},[r2]! 142 beq Ldone 143 144 vdup.32 q10,d7[1] 145 vext.8 q9,q0,q8,#12 146.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 147 148 veor q8,q8,q9 149 vext.8 q9,q0,q9,#12 150 veor q8,q8,q9 151 vext.8 q9,q0,q9,#12 152 veor q8,q8,q9 153 154 veor q8,q8,q10 155 b Loop256 156 157Ldone: 158 str r12,[r2] 159 mov r3,#0 160 161Lenc_key_abort: 162 mov r0,r3 @ return value 163 164 bx lr 165 166.globl _GFp_aes_hw_encrypt 167.private_extern _GFp_aes_hw_encrypt 168#ifdef __thumb2__ 169.thumb_func _GFp_aes_hw_encrypt 170#endif 171.align 5 172_GFp_aes_hw_encrypt: 173 AARCH64_VALID_CALL_TARGET 174 ldr r3,[r2,#240] 175 vld1.32 {q0},[r2]! 176 vld1.8 {q2},[r0] 177 sub r3,r3,#2 178 vld1.32 {q1},[r2]! 179 180Loop_enc: 181.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 182.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 183 vld1.32 {q0},[r2]! 184 subs r3,r3,#2 185.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 186.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 187 vld1.32 {q1},[r2]! 188 bgt Loop_enc 189 190.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 191.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 192 vld1.32 {q0},[r2] 193.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 194 veor q2,q2,q0 195 196 vst1.8 {q2},[r1] 197 bx lr 198 199.globl _GFp_aes_hw_decrypt 200.private_extern _GFp_aes_hw_decrypt 201#ifdef __thumb2__ 202.thumb_func _GFp_aes_hw_decrypt 203#endif 204.align 5 205_GFp_aes_hw_decrypt: 206 AARCH64_VALID_CALL_TARGET 207 ldr r3,[r2,#240] 208 vld1.32 {q0},[r2]! 209 vld1.8 {q2},[r0] 210 sub r3,r3,#2 211 vld1.32 {q1},[r2]! 212 213Loop_dec: 214.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 215.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 216 vld1.32 {q0},[r2]! 217 subs r3,r3,#2 218.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 219.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 220 vld1.32 {q1},[r2]! 221 bgt Loop_dec 222 223.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 224.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 225 vld1.32 {q0},[r2] 226.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 227 veor q2,q2,q0 228 229 vst1.8 {q2},[r1] 230 bx lr 231 232.globl _GFp_aes_hw_ctr32_encrypt_blocks 233.private_extern _GFp_aes_hw_ctr32_encrypt_blocks 234#ifdef __thumb2__ 235.thumb_func _GFp_aes_hw_ctr32_encrypt_blocks 236#endif 237.align 5 238_GFp_aes_hw_ctr32_encrypt_blocks: 239 mov ip,sp 240 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} 241 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 242 ldr r4, [ip] @ load remaining arg 243 ldr r5,[r3,#240] 244 245 ldr r8, [r4, #12] 246 vld1.32 {q0},[r4] 247 248 vld1.32 {q8,q9},[r3] @ load key schedule... 249 sub r5,r5,#4 250 mov r12,#16 251 cmp r2,#2 252 add r7,r3,r5,lsl#4 @ pointer to last 5 round keys 253 sub r5,r5,#2 254 vld1.32 {q12,q13},[r7]! 255 vld1.32 {q14,q15},[r7]! 256 vld1.32 {q7},[r7] 257 add r7,r3,#32 258 mov r6,r5 259 movlo r12,#0 260 261 @ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are 262 @ affected by silicon errata #1742098 [0] and #1655431 [1], 263 @ respectively, where the second instruction of an aese/aesmc 264 @ instruction pair may execute twice if an interrupt is taken right 265 @ after the first instruction consumes an input register of which a 266 @ single 32-bit lane has been updated the last time it was modified. 267 @ 268 @ This function uses a counter in one 32-bit lane. The 269 @ could write to q1 and q10 directly, but that trips this bugs. 270 @ We write to q6 and copy to the final register as a workaround. 271 @ 272 @ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice 273 @ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice 274#ifndef __ARMEB__ 275 rev r8, r8 276#endif 277 add r10, r8, #1 278 vorr q6,q0,q0 279 rev r10, r10 280 vmov.32 d13[1],r10 281 add r8, r8, #2 282 vorr q1,q6,q6 283 bls Lctr32_tail 284 rev r12, r8 285 vmov.32 d13[1],r12 286 sub r2,r2,#3 @ bias 287 vorr q10,q6,q6 288 b Loop3x_ctr32 289 290.align 4 291Loop3x_ctr32: 292.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 293.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 294.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 295.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 296.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 297.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 298 vld1.32 {q8},[r7]! 299 subs r6,r6,#2 300.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 301.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 302.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 303.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 304.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 305.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 306 vld1.32 {q9},[r7]! 307 bgt Loop3x_ctr32 308 309.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 310.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 311.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 312.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 313 vld1.8 {q2},[r0]! 314 add r9,r8,#1 315.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 316.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 317 vld1.8 {q3},[r0]! 318 rev r9,r9 319.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 320.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 321.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 322.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 323 vld1.8 {q11},[r0]! 324 mov r7,r3 325.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 326.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 327.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 328.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 329.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 330.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 331 veor q2,q2,q7 332 add r10,r8,#2 333.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 334.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 335 veor q3,q3,q7 336 add r8,r8,#3 337.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 338.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 339.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 340.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 341 @ Note the logic to update q0, q1, and q1 is written to work 342 @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in 343 @ 32-bit mode. See the comment above. 344 veor q11,q11,q7 345 vmov.32 d13[1], r9 346.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 347.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 348 vorr q0,q6,q6 349 rev r10,r10 350.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 351.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 352 vmov.32 d13[1], r10 353 rev r12,r8 354.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 355.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 356 vorr q1,q6,q6 357 vmov.32 d13[1], r12 358.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 359.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 360 vorr q10,q6,q6 361 subs r2,r2,#3 362.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 363.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 364.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 365 366 veor q2,q2,q4 367 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 368 vst1.8 {q2},[r1]! 369 veor q3,q3,q5 370 mov r6,r5 371 vst1.8 {q3},[r1]! 372 veor q11,q11,q9 373 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 374 vst1.8 {q11},[r1]! 375 bhs Loop3x_ctr32 376 377 adds r2,r2,#3 378 beq Lctr32_done 379 cmp r2,#1 380 mov r12,#16 381 moveq r12,#0 382 383Lctr32_tail: 384.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 385.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 386.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 387.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 388 vld1.32 {q8},[r7]! 389 subs r6,r6,#2 390.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 391.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 392.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 393.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 394 vld1.32 {q9},[r7]! 395 bgt Lctr32_tail 396 397.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 398.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 399.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 400.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 401.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 402.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 403.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 404.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 405 vld1.8 {q2},[r0],r12 406.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 407.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 408.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 409.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 410 vld1.8 {q3},[r0] 411.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 412.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 413.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 414.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 415 veor q2,q2,q7 416.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 417.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 418.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 419.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 420 veor q3,q3,q7 421.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 422.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 423 424 cmp r2,#1 425 veor q2,q2,q0 426 veor q3,q3,q1 427 vst1.8 {q2},[r1]! 428 beq Lctr32_done 429 vst1.8 {q3},[r1] 430 431Lctr32_done: 432 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 433 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} 434 435#endif 436#endif // !OPENSSL_NO_ASM 437