1C arm/ecc-384-modp.asm 2 3ifelse(< 4 Copyright (C) 2013 Niels Möller 5 6 This file is part of GNU Nettle. 7 8 GNU Nettle is free software: you can redistribute it and/or 9 modify it under the terms of either: 10 11 * the GNU Lesser General Public License as published by the Free 12 Software Foundation; either version 3 of the License, or (at your 13 option) any later version. 14 15 or 16 17 * the GNU General Public License as published by the Free 18 Software Foundation; either version 2 of the License, or (at your 19 option) any later version. 20 21 or both in parallel, as here. 22 23 GNU Nettle is distributed in the hope that it will be useful, 24 but WITHOUT ANY WARRANTY; without even the implied warranty of 25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 26 General Public License for more details. 27 28 You should have received copies of the GNU General Public License and 29 the GNU Lesser General Public License along with this program. If 30 not, see http://www.gnu.org/licenses/. 31>) 32 33 .file "ecc-384-modp.asm" 34 .arm 35 36define(<RP>, <r1>) 37define(<T0>, <r0>) 38define(<T1>, <r2>) 39define(<T2>, <r3>) 40define(<T3>, <r4>) 41define(<F0>, <r5>) 42define(<F1>, <r6>) 43define(<F2>, <r7>) 44define(<F3>, <r8>) 45define(<F4>, <r10>) 46define(<N>, <r12>) 47define(<H>, <lr>) 48 49 C ecc_384_modp (const struct ecc_modulo *m, mp_limb_t *rp) 50 .text 51 .align 2 52 53PROLOGUE(nettle_ecc_384_modp) 54 push {r4,r5,r6,r7,r8,r10,lr} 55 56 add RP, RP, #80 57 ldm RP, {T0, T1, T2, T3} C 20-23 58 59 C First get top 4 limbs, which need folding twice, as 60 C 61 C T3 T2 T1 T0 62 C T3 T2 T1 63 C -T3 64 C ---------------- 65 C F4 F3 F2 F1 F0 66 C 67 C Start with 68 C 69 C T3 T1 T0 70 C T1 71 C -T3 72 C ----------- 73 C F2 F1 F0 Always fits 74 75 adds F0, T0, T1 76 adcs F1, T1, #0 77 adcs F2, T3, #0 78 subs F0, F0, T3 79 sbcs F1, F1, #0 80 sbcs F2, F2, #0 81 82 C T3 T2 T2 0 83 C F2 F1 F0 84 C ---------------- 85 C F4 F3 F2 F1 F0 86 87 mov F4, #0 88 adds F1, F1, T2 89 adcs F2, F2, T2 90 adcs F3, T3, #0 91 adcs F4, F4, #0 92 93 C Add in to high part 94 sub RP, RP, #32 95 ldm RP, {T0, T1, T2, T3} C 12-15 96 mov H, #0 97 adds F0, T0, F0 98 adcs F1, T1, F1 99 adcs F2, T2, F2 100 adcs F3, T3, F3 101 adcs F4, F4, #0 C Do F4 later 102 103 C Add to low part, keeping carry (positive or negative) in H 104 sub RP, RP, #48 105 ldm RP, {T0, T1, T2, T3} C 0-3 106 mov H, #0 107 adds T0, T0, F0 108 adcs T1, T1, F1 109 adcs T2, T2, F2 110 adcs T3, T3, F3 111 adc H, H, #0 112 subs T1, T1, F0 113 sbcs T2, T2, F1 114 sbcs T3, T3, F2 115 sbc H, H, #0 116 adds T3, T3, F0 117 adc H, H, #0 118 119 stm RP!, {T0,T1,T2,T3} C 0-3 120 mov N, #2 121.Loop: 122 ldm RP, {T0,T1,T2,T3} C 4-7 123 124 C First, propagate carry 125 adds T0, T0, H 126 asr H, #31 C Sign extend 127 adcs T1, T1, H 128 adcs T2, T2, H 129 adcs T3, T3, H 130 adc H, H, #0 131 132 C +B^4 term 133 adds T0, T0, F0 134 adcs T1, T1, F1 135 adcs T2, T2, F2 136 adcs T3, T3, F3 137 adc H, H, #0 138 139 C +B^3 terms 140 ldr F0, [RP, #+48] C 16 141 adds T0, T0, F1 142 adcs T1, T1, F2 143 adcs T2, T2, F3 144 adcs T3, T3, F0 145 adc H, H, #0 146 147 C -B 148 ldr F1, [RP, #+52] C 17-18 149 ldr F2, [RP, #+56] 150 subs T0, T0, F3 151 sbcs T1, T1, F0 152 sbcs T2, T2, F1 153 sbcs T3, T3, F2 154 sbcs H, H, #0 155 156 C +1 157 ldr F3, [RP, #+60] C 19 158 adds T0, T0, F0 159 adcs T1, T1, F1 160 adcs T2, T2, F2 161 adcs T3, T3, F3 162 adc H, H, #0 163 subs N, N, #1 164 stm RP!, {T0,T1,T2,T3} 165 bne .Loop 166 167 C Fold high limbs, we need to add in 168 C 169 C F4 F4 0 -F4 F4 H H 0 -H H 170 C 171 C We always have F4 >= 0, but we can have H < 0. 172 C Sign extension gets tricky when F4 = 0 and H < 0. 173 sub RP, RP, #48 174 175 ldm RP, {T0,T1,T2,T3} C 0-3 176 177 C H H 0 -H H 178 C ---------------- 179 C S H F3 F2 F1 F0 180 C 181 C Define S = H >> 31 (asr), we then have 182 C 183 C F0 = H 184 C F1 = S - H 185 C F2 = - [H > 0] 186 C F3 = H - [H > 0] 187 C H = H + S 188 C 189 C And we get underflow in S - H iff H > 0 190 191 C H = 0 H > 0 H = -1 192 mov F0, H C 0 H -1 193 asr H, #31 194 subs F1, H, F0 C 0,C=1 -H,C=0 0,C=1 195 sbc F2, F2, F2 C 0 -1 0 196 sbc F3, F0, #0 C 0 H-1 -1 197 198 adds T0, T0, F0 199 adcs T1, T1, F1 200 adcs T2, T2, F2 201 adcs T3, T3, F3 202 adc H, H, F0 C 0+cy H+cy -2+cy 203 204 stm RP!, {T0,T1,T2,T3} C 0-3 205 ldm RP, {T0,T1,T2,T3} C 4-7 206 207 C F4 0 -F4 208 C --------- 209 C F3 F2 F1 210 211 rsbs F1, F4, #0 212 sbc F2, F2, F2 213 sbc F3, F4, #0 214 215 C Sign extend H 216 adds F0, F4, H 217 asr H, H, #31 218 adcs F1, F1, H 219 adcs F2, F2, H 220 adcs F3, F3, H 221 adcs F4, F4, H 222 adc H, H, #0 223 224 adds T0, T0, F0 225 adcs T1, T1, F1 226 adcs T2, T2, F2 227 adcs T3, T3, F3 228 229 stm RP!, {T0,T1,T2,T3} C 4-7 230 ldm RP, {T0,T1,T2,T3} C 8-11 231 232 adcs T0, T0, F4 233 adcs T1, T1, H 234 adcs T2, T2, H 235 adcs T3, T3, H 236 adc H, H, #0 237 238 stm RP, {T0,T1,T2,T3} C 8-11 239 240 C Final (unlikely) carry 241 sub RP, RP, #32 242 ldm RP, {T0,T1,T2,T3} C 0-3 243 C Fold H into F0-F4 244 mov F0, H 245 asr H, #31 246 subs F1, H, F0 247 sbc F2, F2, F2 248 sbc F3, F0, #0 249 add F4, F0, H 250 251 adds T0, T0, F0 252 adcs T1, T1, F1 253 adcs T2, T2, F2 254 adcs T3, T3, F3 255 256 stm RP!, {T0,T1,T2,T3} C 0-3 257 ldm RP, {T0,T1,T2,T3} C 4-7 258 adcs T0, T0, F4 259 adcs T1, T1, H 260 adcs T2, T2, H 261 adcs T3, T3, H 262 stm RP!, {T0,T1,T2,T3} C 4-7 263 ldm RP, {T0,T1,T2,T3} C 8-11 264 adcs T0, T0, H 265 adcs T1, T1, H 266 adcs T2, T2, H 267 adcs T3, T3, H 268 stm RP!, {T0,T1,T2,T3} C 8-11 269 pop {r4,r5,r6,r7,r8,r10,pc} 270EPILOGUE(nettle_ecc_384_modp) 271