1C arm/aes-encrypt-internal.asm 2 3ifelse(< 4 Copyright (C) 2013 Niels Möller 5 6 This file is part of GNU Nettle. 7 8 GNU Nettle is free software: you can redistribute it and/or 9 modify it under the terms of either: 10 11 * the GNU Lesser General Public License as published by the Free 12 Software Foundation; either version 3 of the License, or (at your 13 option) any later version. 14 15 or 16 17 * the GNU General Public License as published by the Free 18 Software Foundation; either version 2 of the License, or (at your 19 option) any later version. 20 21 or both in parallel, as here. 22 23 GNU Nettle is distributed in the hope that it will be useful, 24 but WITHOUT ANY WARRANTY; without even the implied warranty of 25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 26 General Public License for more details. 27 28 You should have received copies of the GNU General Public License and 29 the GNU Lesser General Public License along with this program. If 30 not, see http://www.gnu.org/licenses/. 31>) 32 33include_src(<arm/aes.m4>) 34 35C Benchmarked at at 725, 815, 990 cycles/block on cortex A9, 36C for 128, 192 and 256 bit key sizes. 37 38C Possible improvements: More efficient load and store with 39C aligned accesses. Better scheduling. 40 41define(<PARAM_ROUNDS>, <r0>) 42define(<PARAM_KEYS>, <r1>) 43define(<TABLE>, <r2>) 44define(<PARAM_LENGTH>, <r3>) 45C On stack: DST, SRC 46 47define(<W0>, <r4>) 48define(<W1>, <r5>) 49define(<W2>, <r6>) 50define(<W3>, <r7>) 51define(<T0>, <r8>) 52define(<COUNT>, <r10>) 53define(<KEY>, <r11>) 54 55define(<MASK>, <r0>) C Overlaps inputs, except TABLE 56define(<X0>, <r1>) 57define(<X1>, <r3>) 58define(<X2>, <r12>) 59define(<X3>, <r14>) C lr 60 61define(<FRAME_ROUNDS>, <[sp]>) 62define(<FRAME_KEYS>, <[sp, #+4]>) 63define(<FRAME_LENGTH>, <[sp, #+8]>) 64C 8 saved registers 65define(<FRAME_DST>, <[sp, #+44]>) 66define(<FRAME_SRC>, <[sp, #+48]>) 67 68 69C AES_ENCRYPT_ROUND(x0,x1,x2,x3,w0,w1,w2,w3,key) 70C MASK should hold the constant 0x3fc. 71define(<AES_ENCRYPT_ROUND>, < 72 73 and T0, MASK, $1, lsl #2 74 ldr $5, [TABLE, T0] 75 and T0, MASK, $2, lsl #2 76 ldr $6, [TABLE, T0] 77 and T0, MASK, $3, lsl #2 78 ldr $7, [TABLE, T0] 79 and T0, MASK, $4, lsl #2 80 ldr $8, [TABLE, T0] 81 82 and T0, MASK, $2, ror #6 83 add TABLE, TABLE, #1024 84 ldr T0, [TABLE, T0] 85 eor $5, $5, T0 86 and T0, MASK, $3, ror #6 87 ldr T0, [TABLE, T0] 88 eor $6, $6, T0 89 and T0, MASK, $4, ror #6 90 ldr T0, [TABLE, T0] 91 eor $7, $7, T0 92 and T0, MASK, $1, ror #6 93 ldr T0, [TABLE, T0] 94 eor $8, $8, T0 95 96 and T0, MASK, $3, ror #14 97 add TABLE, TABLE, #1024 98 ldr T0, [TABLE, T0] 99 eor $5, $5, T0 100 and T0, MASK, $4, ror #14 101 ldr T0, [TABLE, T0] 102 eor $6, $6, T0 103 and T0, MASK, $1, ror #14 104 ldr T0, [TABLE, T0] 105 eor $7, $7, T0 106 and T0, MASK, $2, ror #14 107 ldr T0, [TABLE, T0] 108 eor $8, $8, T0 109 110 and T0, MASK, $4, ror #22 111 add TABLE, TABLE, #1024 112 ldr T0, [TABLE, T0] 113 eor $5, $5, T0 114 and T0, MASK, $1, ror #22 115 ldr T0, [TABLE, T0] 116 eor $6, $6, T0 117 and T0, MASK, $2, ror #22 118 ldr T0, [TABLE, T0] 119 eor $7, $7, T0 120 and T0, MASK, $3, ror #22 121 ldr T0, [TABLE, T0] 122 123 ldm $9!, {$1,$2,$3,$4} 124 eor $8, $8, T0 125 sub TABLE, TABLE, #3072 126 eor $5, $5, $1 127 eor $6, $6, $2 128 eor $7, $7, $3 129 eor $8, $8, $4 130>) 131 132 .file "aes-encrypt-internal.asm" 133 134 C _aes_encrypt(unsigned rounds, const uint32_t *keys, 135 C const struct aes_table *T, 136 C size_t length, uint8_t *dst, 137 C uint8_t *src) 138 .text 139 ALIGN(4) 140PROLOGUE(_nettle_aes_encrypt) 141 teq PARAM_LENGTH, #0 142 beq .Lend 143 144 push {r0,r1,r3, r4,r5,r6,r7,r8,r10,r11,lr} 145 mov MASK, #0x3fc 146 ALIGN(16) 147.Lblock_loop: 148 ldr X0, FRAME_SRC C Use X0 as SRC pointer 149 ldm sp, {COUNT, KEY} 150 151 AES_LOAD(X0,KEY,W0) 152 AES_LOAD(X0,KEY,W1) 153 AES_LOAD(X0,KEY,W2) 154 AES_LOAD(X0,KEY,W3) 155 156 str X0, FRAME_SRC 157 158 add TABLE, TABLE, #AES_TABLE0 159 160 b .Lentry 161 ALIGN(16) 162.Lround_loop: 163 C Transform X -> W 164 AES_ENCRYPT_ROUND(X0, X1, X2, X3, W0, W1, W2, W3, KEY) 165 166.Lentry: 167 subs COUNT, COUNT,#2 168 C Transform W -> X 169 AES_ENCRYPT_ROUND(W0, W1, W2, W3, X0, X1, X2, X3, KEY) 170 171 bne .Lround_loop 172 173 lsr COUNT, MASK, #2 C Put the needed mask in the unused COUNT register 174 sub TABLE, TABLE, #AES_TABLE0 175 C Final round 176 AES_FINAL_ROUND_V5(X0, X1, X2, X3, KEY, W0, COUNT) 177 AES_FINAL_ROUND_V5(X1, X2, X3, X0, KEY, W1, COUNT) 178 AES_FINAL_ROUND_V5(X2, X3, X0, X1, KEY, W2, COUNT) 179 AES_FINAL_ROUND_V5(X3, X0, X1, X2, KEY, W3, COUNT) 180 181 ldr X0, FRAME_DST 182 ldr X1, FRAME_LENGTH 183 184 AES_STORE(X0,W0) 185 AES_STORE(X0,W1) 186 AES_STORE(X0,W2) 187 AES_STORE(X0,W3) 188 189 subs X1, X1, #16 190 str X0, FRAME_DST 191 str X1, FRAME_LENGTH 192 193 bhi .Lblock_loop 194 195 add sp, sp, #12 C Drop saved r0, r1, r3 196 pop {r4,r5,r6,r7,r8,r10,r11,pc} 197 198.Lend: 199 bx lr 200EPILOGUE(_nettle_aes_encrypt) 201