1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from sha512-armv8.pl. */ 2bc3d5698SJohn Baldwin// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. 3bc3d5698SJohn Baldwin// 4c0855eaaSJohn Baldwin// Licensed under the Apache License 2.0 (the "License"). You may not use 5bc3d5698SJohn Baldwin// this file except in compliance with the License. You can obtain a copy 6bc3d5698SJohn Baldwin// in the file LICENSE in the source distribution or at 7bc3d5698SJohn Baldwin// https://www.openssl.org/source/license.html 8bc3d5698SJohn Baldwin 9bc3d5698SJohn Baldwin// ==================================================================== 10bc3d5698SJohn Baldwin// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 11bc3d5698SJohn Baldwin// project. The module is, however, dual licensed under OpenSSL and 12bc3d5698SJohn Baldwin// CRYPTOGAMS licenses depending on where you obtain it. For further 13bc3d5698SJohn Baldwin// details see http://www.openssl.org/~appro/cryptogams/. 14bc3d5698SJohn Baldwin// 15bc3d5698SJohn Baldwin// Permission to use under GPLv2 terms is granted. 16bc3d5698SJohn Baldwin// ==================================================================== 17bc3d5698SJohn Baldwin// 18bc3d5698SJohn Baldwin// SHA256/512 for ARMv8. 19bc3d5698SJohn Baldwin// 20bc3d5698SJohn Baldwin// Performance in cycles per processed byte and improvement coefficient 21bc3d5698SJohn Baldwin// over code generated with "default" compiler: 22bc3d5698SJohn Baldwin// 23bc3d5698SJohn Baldwin// SHA256-hw SHA256(*) SHA512 24bc3d5698SJohn Baldwin// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) 25bc3d5698SJohn Baldwin// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) 26bc3d5698SJohn Baldwin// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) 27bc3d5698SJohn Baldwin// Denver 2.01 10.5 (+26%) 6.70 (+8%) 28bc3d5698SJohn Baldwin// X-Gene 20.0 (+100%) 12.8 (+300%(***)) 29bc3d5698SJohn Baldwin// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) 30bc3d5698SJohn Baldwin// Kryo 1.92 17.4 (+30%) 11.2 (+8%) 31c0855eaaSJohn Baldwin// ThunderX2 2.54 13.2 (+40%) 8.40 (+18%) 32bc3d5698SJohn Baldwin// 33bc3d5698SJohn Baldwin// (*) Software SHA256 results are of lesser relevance, presented 34bc3d5698SJohn Baldwin// mostly for informational purposes. 35bc3d5698SJohn Baldwin// (**) The result is a trade-off: it's possible to improve it by 36bc3d5698SJohn Baldwin// 10% (or by 1 cycle per round), but at the cost of 20% loss 37bc3d5698SJohn Baldwin// on Cortex-A53 (or by 4 cycles per round). 38bc3d5698SJohn Baldwin// (***) Super-impressive coefficients over gcc-generated code are 39bc3d5698SJohn Baldwin// indication of some compiler "pathology", most notably code 40bc3d5698SJohn Baldwin// generated with -mgeneral-regs-only is significantly faster 41bc3d5698SJohn Baldwin// and the gap is only 40-90%. 42bc3d5698SJohn Baldwin// 43bc3d5698SJohn Baldwin// October 2016. 44bc3d5698SJohn Baldwin// 45bc3d5698SJohn Baldwin// Originally it was reckoned that it makes no sense to implement NEON 46bc3d5698SJohn Baldwin// version of SHA256 for 64-bit processors. This is because performance 47bc3d5698SJohn Baldwin// improvement on most wide-spread Cortex-A5x processors was observed 48bc3d5698SJohn Baldwin// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was 49bc3d5698SJohn Baldwin// observed that 32-bit NEON SHA256 performs significantly better than 50bc3d5698SJohn Baldwin// 64-bit scalar version on *some* of the more recent processors. As 51bc3d5698SJohn Baldwin// result 64-bit NEON version of SHA256 was added to provide best 52bc3d5698SJohn Baldwin// all-round performance. For example it executes ~30% faster on X-Gene 53bc3d5698SJohn Baldwin// and Mongoose. [For reference, NEON version of SHA512 is bound to 54bc3d5698SJohn Baldwin// deliver much less improvement, likely *negative* on Cortex-A5x. 55bc3d5698SJohn Baldwin// Which is why NEON support is limited to SHA256.] 56bc3d5698SJohn Baldwin 57c0855eaaSJohn Baldwin// $output is the last argument if it looks like a file (it has an extension) 58c0855eaaSJohn Baldwin// $flavour is the first argument if it doesn't look like a file 59bc3d5698SJohn Baldwin#include "arm_arch.h" 60bd9588bcSAndrew Turner#ifndef __KERNEL__ 61c0855eaaSJohn Baldwin 62c0855eaaSJohn Baldwin.hidden OPENSSL_armcap_P 63bc3d5698SJohn Baldwin#endif 64bc3d5698SJohn Baldwin 65bc3d5698SJohn Baldwin.text 66bc3d5698SJohn Baldwin 67bc3d5698SJohn Baldwin.globl sha256_block_data_order 68bc3d5698SJohn Baldwin.type sha256_block_data_order,%function 69bc3d5698SJohn Baldwin.align 6 70bc3d5698SJohn Baldwinsha256_block_data_order: 71bd9588bcSAndrew Turner AARCH64_VALID_CALL_TARGET 72bc3d5698SJohn Baldwin#ifndef __KERNEL__ 73c0855eaaSJohn Baldwin adrp x16,OPENSSL_armcap_P 74c0855eaaSJohn Baldwin ldr w16,[x16,#:lo12:OPENSSL_armcap_P] 75bc3d5698SJohn Baldwin tst w16,#ARMV8_SHA256 76bc3d5698SJohn Baldwin b.ne .Lv8_entry 77bc3d5698SJohn Baldwin tst w16,#ARMV7_NEON 78bc3d5698SJohn Baldwin b.ne .Lneon_entry 79bc3d5698SJohn Baldwin#endif 80bd9588bcSAndrew Turner AARCH64_SIGN_LINK_REGISTER 81bc3d5698SJohn Baldwin stp x29,x30,[sp,#-128]! 82bc3d5698SJohn Baldwin add x29,sp,#0 83bc3d5698SJohn Baldwin 84bc3d5698SJohn Baldwin stp x19,x20,[sp,#16] 85bc3d5698SJohn Baldwin stp x21,x22,[sp,#32] 86bc3d5698SJohn Baldwin stp x23,x24,[sp,#48] 87bc3d5698SJohn Baldwin stp x25,x26,[sp,#64] 88bc3d5698SJohn Baldwin stp x27,x28,[sp,#80] 89bc3d5698SJohn Baldwin sub sp,sp,#4*4 90bc3d5698SJohn Baldwin 91bc3d5698SJohn Baldwin ldp w20,w21,[x0] // load context 92bc3d5698SJohn Baldwin ldp w22,w23,[x0,#2*4] 93bc3d5698SJohn Baldwin ldp w24,w25,[x0,#4*4] 94bc3d5698SJohn Baldwin add x2,x1,x2,lsl#6 // end of input 95bc3d5698SJohn Baldwin ldp w26,w27,[x0,#6*4] 96bc3d5698SJohn Baldwin adr x30,.LK256 97bc3d5698SJohn Baldwin stp x0,x2,[x29,#96] 98bc3d5698SJohn Baldwin 99bc3d5698SJohn Baldwin.Loop: 100bc3d5698SJohn Baldwin ldp w3,w4,[x1],#2*4 101bc3d5698SJohn Baldwin ldr w19,[x30],#4 // *K++ 102bc3d5698SJohn Baldwin eor w28,w21,w22 // magic seed 103bc3d5698SJohn Baldwin str x1,[x29,#112] 104bc3d5698SJohn Baldwin#ifndef __AARCH64EB__ 105bc3d5698SJohn Baldwin rev w3,w3 // 0 106bc3d5698SJohn Baldwin#endif 107bc3d5698SJohn Baldwin ror w16,w24,#6 108bc3d5698SJohn Baldwin add w27,w27,w19 // h+=K[i] 109bc3d5698SJohn Baldwin eor w6,w24,w24,ror#14 110bc3d5698SJohn Baldwin and w17,w25,w24 111bc3d5698SJohn Baldwin bic w19,w26,w24 112bc3d5698SJohn Baldwin add w27,w27,w3 // h+=X[i] 113bc3d5698SJohn Baldwin orr w17,w17,w19 // Ch(e,f,g) 114bc3d5698SJohn Baldwin eor w19,w20,w21 // a^b, b^c in next round 115bc3d5698SJohn Baldwin eor w16,w16,w6,ror#11 // Sigma1(e) 116bc3d5698SJohn Baldwin ror w6,w20,#2 117bc3d5698SJohn Baldwin add w27,w27,w17 // h+=Ch(e,f,g) 118bc3d5698SJohn Baldwin eor w17,w20,w20,ror#9 119bc3d5698SJohn Baldwin add w27,w27,w16 // h+=Sigma1(e) 120bc3d5698SJohn Baldwin and w28,w28,w19 // (b^c)&=(a^b) 121bc3d5698SJohn Baldwin add w23,w23,w27 // d+=h 122bc3d5698SJohn Baldwin eor w28,w28,w21 // Maj(a,b,c) 123bc3d5698SJohn Baldwin eor w17,w6,w17,ror#13 // Sigma0(a) 124bc3d5698SJohn Baldwin add w27,w27,w28 // h+=Maj(a,b,c) 125bc3d5698SJohn Baldwin ldr w28,[x30],#4 // *K++, w19 in next round 126bc3d5698SJohn Baldwin //add w27,w27,w17 // h+=Sigma0(a) 127bc3d5698SJohn Baldwin#ifndef __AARCH64EB__ 128bc3d5698SJohn Baldwin rev w4,w4 // 1 129bc3d5698SJohn Baldwin#endif 130bc3d5698SJohn Baldwin ldp w5,w6,[x1],#2*4 131bc3d5698SJohn Baldwin add w27,w27,w17 // h+=Sigma0(a) 132bc3d5698SJohn Baldwin ror w16,w23,#6 133bc3d5698SJohn Baldwin add w26,w26,w28 // h+=K[i] 134bc3d5698SJohn Baldwin eor w7,w23,w23,ror#14 135bc3d5698SJohn Baldwin and w17,w24,w23 136bc3d5698SJohn Baldwin bic w28,w25,w23 137bc3d5698SJohn Baldwin add w26,w26,w4 // h+=X[i] 138bc3d5698SJohn Baldwin orr w17,w17,w28 // Ch(e,f,g) 139bc3d5698SJohn Baldwin eor w28,w27,w20 // a^b, b^c in next round 140bc3d5698SJohn Baldwin eor w16,w16,w7,ror#11 // Sigma1(e) 141bc3d5698SJohn Baldwin ror w7,w27,#2 142bc3d5698SJohn Baldwin add w26,w26,w17 // h+=Ch(e,f,g) 143bc3d5698SJohn Baldwin eor w17,w27,w27,ror#9 144bc3d5698SJohn Baldwin add w26,w26,w16 // h+=Sigma1(e) 145bc3d5698SJohn Baldwin and w19,w19,w28 // (b^c)&=(a^b) 146bc3d5698SJohn Baldwin add w22,w22,w26 // d+=h 147bc3d5698SJohn Baldwin eor w19,w19,w20 // Maj(a,b,c) 148bc3d5698SJohn Baldwin eor w17,w7,w17,ror#13 // Sigma0(a) 149bc3d5698SJohn Baldwin add w26,w26,w19 // h+=Maj(a,b,c) 150bc3d5698SJohn Baldwin ldr w19,[x30],#4 // *K++, w28 in next round 151bc3d5698SJohn Baldwin //add w26,w26,w17 // h+=Sigma0(a) 152bc3d5698SJohn Baldwin#ifndef __AARCH64EB__ 153bc3d5698SJohn Baldwin rev w5,w5 // 2 154bc3d5698SJohn Baldwin#endif 155bc3d5698SJohn Baldwin add w26,w26,w17 // h+=Sigma0(a) 156bc3d5698SJohn Baldwin ror w16,w22,#6 157bc3d5698SJohn Baldwin add w25,w25,w19 // h+=K[i] 158bc3d5698SJohn Baldwin eor w8,w22,w22,ror#14 159bc3d5698SJohn Baldwin and w17,w23,w22 160bc3d5698SJohn Baldwin bic w19,w24,w22 161bc3d5698SJohn Baldwin add w25,w25,w5 // h+=X[i] 162bc3d5698SJohn Baldwin orr w17,w17,w19 // Ch(e,f,g) 163bc3d5698SJohn Baldwin eor w19,w26,w27 // a^b, b^c in next round 164bc3d5698SJohn Baldwin eor w16,w16,w8,ror#11 // Sigma1(e) 165bc3d5698SJohn Baldwin ror w8,w26,#2 166bc3d5698SJohn Baldwin add w25,w25,w17 // h+=Ch(e,f,g) 167bc3d5698SJohn Baldwin eor w17,w26,w26,ror#9 168bc3d5698SJohn Baldwin add w25,w25,w16 // h+=Sigma1(e) 169bc3d5698SJohn Baldwin and w28,w28,w19 // (b^c)&=(a^b) 170bc3d5698SJohn Baldwin add w21,w21,w25 // d+=h 171bc3d5698SJohn Baldwin eor w28,w28,w27 // Maj(a,b,c) 172bc3d5698SJohn Baldwin eor w17,w8,w17,ror#13 // Sigma0(a) 173bc3d5698SJohn Baldwin add w25,w25,w28 // h+=Maj(a,b,c) 174bc3d5698SJohn Baldwin ldr w28,[x30],#4 // *K++, w19 in next round 175bc3d5698SJohn Baldwin //add w25,w25,w17 // h+=Sigma0(a) 176bc3d5698SJohn Baldwin#ifndef __AARCH64EB__ 177bc3d5698SJohn Baldwin rev w6,w6 // 3 178bc3d5698SJohn Baldwin#endif 179bc3d5698SJohn Baldwin ldp w7,w8,[x1],#2*4 180bc3d5698SJohn Baldwin add w25,w25,w17 // h+=Sigma0(a) 181bc3d5698SJohn Baldwin ror w16,w21,#6 182bc3d5698SJohn Baldwin add w24,w24,w28 // h+=K[i] 183bc3d5698SJohn Baldwin eor w9,w21,w21,ror#14 184bc3d5698SJohn Baldwin and w17,w22,w21 185bc3d5698SJohn Baldwin bic w28,w23,w21 186bc3d5698SJohn Baldwin add w24,w24,w6 // h+=X[i] 187bc3d5698SJohn Baldwin orr w17,w17,w28 // Ch(e,f,g) 188bc3d5698SJohn Baldwin eor w28,w25,w26 // a^b, b^c in next round 189bc3d5698SJohn Baldwin eor w16,w16,w9,ror#11 // Sigma1(e) 190bc3d5698SJohn Baldwin ror w9,w25,#2 191bc3d5698SJohn Baldwin add w24,w24,w17 // h+=Ch(e,f,g) 192bc3d5698SJohn Baldwin eor w17,w25,w25,ror#9 193bc3d5698SJohn Baldwin add w24,w24,w16 // h+=Sigma1(e) 194bc3d5698SJohn Baldwin and w19,w19,w28 // (b^c)&=(a^b) 195bc3d5698SJohn Baldwin add w20,w20,w24 // d+=h 196bc3d5698SJohn Baldwin eor w19,w19,w26 // Maj(a,b,c) 197bc3d5698SJohn Baldwin eor w17,w9,w17,ror#13 // Sigma0(a) 198bc3d5698SJohn Baldwin add w24,w24,w19 // h+=Maj(a,b,c) 199bc3d5698SJohn Baldwin ldr w19,[x30],#4 // *K++, w28 in next round 200bc3d5698SJohn Baldwin //add w24,w24,w17 // h+=Sigma0(a) 201bc3d5698SJohn Baldwin#ifndef __AARCH64EB__ 202bc3d5698SJohn Baldwin rev w7,w7 // 4 203bc3d5698SJohn Baldwin#endif 204bc3d5698SJohn Baldwin add w24,w24,w17 // h+=Sigma0(a) 205bc3d5698SJohn Baldwin ror w16,w20,#6 206bc3d5698SJohn Baldwin add w23,w23,w19 // h+=K[i] 207bc3d5698SJohn Baldwin eor w10,w20,w20,ror#14 208bc3d5698SJohn Baldwin and w17,w21,w20 209bc3d5698SJohn Baldwin bic w19,w22,w20 210bc3d5698SJohn Baldwin add w23,w23,w7 // h+=X[i] 211bc3d5698SJohn Baldwin orr w17,w17,w19 // Ch(e,f,g) 212bc3d5698SJohn Baldwin eor w19,w24,w25 // a^b, b^c in next round 213bc3d5698SJohn Baldwin eor w16,w16,w10,ror#11 // Sigma1(e) 214bc3d5698SJohn Baldwin ror w10,w24,#2 215bc3d5698SJohn Baldwin add w23,w23,w17 // h+=Ch(e,f,g) 216bc3d5698SJohn Baldwin eor w17,w24,w24,ror#9 217bc3d5698SJohn Baldwin add w23,w23,w16 // h+=Sigma1(e) 218bc3d5698SJohn Baldwin and w28,w28,w19 // (b^c)&=(a^b) 219bc3d5698SJohn Baldwin add w27,w27,w23 // d+=h 220bc3d5698SJohn Baldwin eor w28,w28,w25 // Maj(a,b,c) 221bc3d5698SJohn Baldwin eor w17,w10,w17,ror#13 // Sigma0(a) 222bc3d5698SJohn Baldwin add w23,w23,w28 // h+=Maj(a,b,c) 223bc3d5698SJohn Baldwin ldr w28,[x30],#4 // *K++, w19 in next round 224bc3d5698SJohn Baldwin //add w23,w23,w17 // h+=Sigma0(a) 225bc3d5698SJohn Baldwin#ifndef __AARCH64EB__ 226bc3d5698SJohn Baldwin rev w8,w8 // 5 227bc3d5698SJohn Baldwin#endif 228bc3d5698SJohn Baldwin ldp w9,w10,[x1],#2*4 229bc3d5698SJohn Baldwin add w23,w23,w17 // h+=Sigma0(a) 230bc3d5698SJohn Baldwin ror w16,w27,#6 231bc3d5698SJohn Baldwin add w22,w22,w28 // h+=K[i] 232bc3d5698SJohn Baldwin eor w11,w27,w27,ror#14 233bc3d5698SJohn Baldwin and w17,w20,w27 234bc3d5698SJohn Baldwin bic w28,w21,w27 235bc3d5698SJohn Baldwin add w22,w22,w8 // h+=X[i] 236bc3d5698SJohn Baldwin orr w17,w17,w28 // Ch(e,f,g) 237bc3d5698SJohn Baldwin eor w28,w23,w24 // a^b, b^c in next round 238bc3d5698SJohn Baldwin eor w16,w16,w11,ror#11 // Sigma1(e) 239bc3d5698SJohn Baldwin ror w11,w23,#2 240bc3d5698SJohn Baldwin add w22,w22,w17 // h+=Ch(e,f,g) 241bc3d5698SJohn Baldwin eor w17,w23,w23,ror#9 242bc3d5698SJohn Baldwin add w22,w22,w16 // h+=Sigma1(e) 243bc3d5698SJohn Baldwin and w19,w19,w28 // (b^c)&=(a^b) 244bc3d5698SJohn Baldwin add w26,w26,w22 // d+=h 245bc3d5698SJohn Baldwin eor w19,w19,w24 // Maj(a,b,c) 246bc3d5698SJohn Baldwin eor w17,w11,w17,ror#13 // Sigma0(a) 247bc3d5698SJohn Baldwin add w22,w22,w19 // h+=Maj(a,b,c) 248bc3d5698SJohn Baldwin ldr w19,[x30],#4 // *K++, w28 in next round 249bc3d5698SJohn Baldwin //add w22,w22,w17 // h+=Sigma0(a) 250bc3d5698SJohn Baldwin#ifndef __AARCH64EB__ 251bc3d5698SJohn Baldwin rev w9,w9 // 6 252bc3d5698SJohn Baldwin#endif 253bc3d5698SJohn Baldwin add w22,w22,w17 // h+=Sigma0(a) 254bc3d5698SJohn Baldwin ror w16,w26,#6 255bc3d5698SJohn Baldwin add w21,w21,w19 // h+=K[i] 256bc3d5698SJohn Baldwin eor w12,w26,w26,ror#14 257bc3d5698SJohn Baldwin and w17,w27,w26 258bc3d5698SJohn Baldwin bic w19,w20,w26 259bc3d5698SJohn Baldwin add w21,w21,w9 // h+=X[i] 260bc3d5698SJohn Baldwin orr w17,w17,w19 // Ch(e,f,g) 261bc3d5698SJohn Baldwin eor w19,w22,w23 // a^b, b^c in next round 262bc3d5698SJohn Baldwin eor w16,w16,w12,ror#11 // Sigma1(e) 263bc3d5698SJohn Baldwin ror w12,w22,#2 264bc3d5698SJohn Baldwin add w21,w21,w17 // h+=Ch(e,f,g) 265bc3d5698SJohn Baldwin eor w17,w22,w22,ror#9 266bc3d5698SJohn Baldwin add w21,w21,w16 // h+=Sigma1(e) 267bc3d5698SJohn Baldwin and w28,w28,w19 // (b^c)&=(a^b) 268bc3d5698SJohn Baldwin add w25,w25,w21 // d+=h 269bc3d5698SJohn Baldwin eor w28,w28,w23 // Maj(a,b,c) 270bc3d5698SJohn Baldwin eor w17,w12,w17,ror#13 // Sigma0(a) 271bc3d5698SJohn Baldwin add w21,w21,w28 // h+=Maj(a,b,c) 272bc3d5698SJohn Baldwin ldr w28,[x30],#4 // *K++, w19 in next round 273bc3d5698SJohn Baldwin //add w21,w21,w17 // h+=Sigma0(a) 274bc3d5698SJohn Baldwin#ifndef __AARCH64EB__ 275bc3d5698SJohn Baldwin rev w10,w10 // 7 276bc3d5698SJohn Baldwin#endif 277bc3d5698SJohn Baldwin ldp w11,w12,[x1],#2*4 278bc3d5698SJohn Baldwin add w21,w21,w17 // h+=Sigma0(a) 279bc3d5698SJohn Baldwin ror w16,w25,#6 280bc3d5698SJohn Baldwin add w20,w20,w28 // h+=K[i] 281bc3d5698SJohn Baldwin eor w13,w25,w25,ror#14 282bc3d5698SJohn Baldwin and w17,w26,w25 283bc3d5698SJohn Baldwin bic w28,w27,w25 284bc3d5698SJohn Baldwin add w20,w20,w10 // h+=X[i] 285bc3d5698SJohn Baldwin orr w17,w17,w28 // Ch(e,f,g) 286bc3d5698SJohn Baldwin eor w28,w21,w22 // a^b, b^c in next round 287bc3d5698SJohn Baldwin eor w16,w16,w13,ror#11 // Sigma1(e) 288bc3d5698SJohn Baldwin ror w13,w21,#2 289bc3d5698SJohn Baldwin add w20,w20,w17 // h+=Ch(e,f,g) 290bc3d5698SJohn Baldwin eor w17,w21,w21,ror#9 291bc3d5698SJohn Baldwin add w20,w20,w16 // h+=Sigma1(e) 292bc3d5698SJohn Baldwin and w19,w19,w28 // (b^c)&=(a^b) 293bc3d5698SJohn Baldwin add w24,w24,w20 // d+=h 294bc3d5698SJohn Baldwin eor w19,w19,w22 // Maj(a,b,c) 295bc3d5698SJohn Baldwin eor w17,w13,w17,ror#13 // Sigma0(a) 296bc3d5698SJohn Baldwin add w20,w20,w19 // h+=Maj(a,b,c) 297bc3d5698SJohn Baldwin ldr w19,[x30],#4 // *K++, w28 in next round 298bc3d5698SJohn Baldwin //add w20,w20,w17 // h+=Sigma0(a) 299bc3d5698SJohn Baldwin#ifndef __AARCH64EB__ 300bc3d5698SJohn Baldwin rev w11,w11 // 8 301bc3d5698SJohn Baldwin#endif 302bc3d5698SJohn Baldwin add w20,w20,w17 // h+=Sigma0(a) 303bc3d5698SJohn Baldwin ror w16,w24,#6 304bc3d5698SJohn Baldwin add w27,w27,w19 // h+=K[i] 305bc3d5698SJohn Baldwin eor w14,w24,w24,ror#14 306bc3d5698SJohn Baldwin and w17,w25,w24 307bc3d5698SJohn Baldwin bic w19,w26,w24 308bc3d5698SJohn Baldwin add w27,w27,w11 // h+=X[i] 309bc3d5698SJohn Baldwin orr w17,w17,w19 // Ch(e,f,g) 310bc3d5698SJohn Baldwin eor w19,w20,w21 // a^b, b^c in next round 311bc3d5698SJohn Baldwin eor w16,w16,w14,ror#11 // Sigma1(e) 312bc3d5698SJohn Baldwin ror w14,w20,#2 313bc3d5698SJohn Baldwin add w27,w27,w17 // h+=Ch(e,f,g) 314bc3d5698SJohn Baldwin eor w17,w20,w20,ror#9 315bc3d5698SJohn Baldwin add w27,w27,w16 // h+=Sigma1(e) 316bc3d5698SJohn Baldwin and w28,w28,w19 // (b^c)&=(a^b) 317bc3d5698SJohn Baldwin add w23,w23,w27 // d+=h 318bc3d5698SJohn Baldwin eor w28,w28,w21 // Maj(a,b,c) 319bc3d5698SJohn Baldwin eor w17,w14,w17,ror#13 // Sigma0(a) 320bc3d5698SJohn Baldwin add w27,w27,w28 // h+=Maj(a,b,c) 321bc3d5698SJohn Baldwin ldr w28,[x30],#4 // *K++, w19 in next round 322bc3d5698SJohn Baldwin //add w27,w27,w17 // h+=Sigma0(a) 323bc3d5698SJohn Baldwin#ifndef __AARCH64EB__ 324bc3d5698SJohn Baldwin rev w12,w12 // 9 325bc3d5698SJohn Baldwin#endif 326bc3d5698SJohn Baldwin ldp w13,w14,[x1],#2*4 327bc3d5698SJohn Baldwin add w27,w27,w17 // h+=Sigma0(a) 328bc3d5698SJohn Baldwin ror w16,w23,#6 329bc3d5698SJohn Baldwin add w26,w26,w28 // h+=K[i] 330bc3d5698SJohn Baldwin eor w15,w23,w23,ror#14 331bc3d5698SJohn Baldwin and w17,w24,w23 332bc3d5698SJohn Baldwin bic w28,w25,w23 333bc3d5698SJohn Baldwin add w26,w26,w12 // h+=X[i] 334bc3d5698SJohn Baldwin orr w17,w17,w28 // Ch(e,f,g) 335bc3d5698SJohn Baldwin eor w28,w27,w20 // a^b, b^c in next round 336bc3d5698SJohn Baldwin eor w16,w16,w15,ror#11 // Sigma1(e) 337bc3d5698SJohn Baldwin ror w15,w27,#2 338bc3d5698SJohn Baldwin add w26,w26,w17 // h+=Ch(e,f,g) 339bc3d5698SJohn Baldwin eor w17,w27,w27,ror#9 340bc3d5698SJohn Baldwin add w26,w26,w16 // h+=Sigma1(e) 341bc3d5698SJohn Baldwin and w19,w19,w28 // (b^c)&=(a^b) 342bc3d5698SJohn Baldwin add w22,w22,w26 // d+=h 343bc3d5698SJohn Baldwin eor w19,w19,w20 // Maj(a,b,c) 344bc3d5698SJohn Baldwin eor w17,w15,w17,ror#13 // Sigma0(a) 345bc3d5698SJohn Baldwin add w26,w26,w19 // h+=Maj(a,b,c) 346bc3d5698SJohn Baldwin ldr w19,[x30],#4 // *K++, w28 in next round 347bc3d5698SJohn Baldwin //add w26,w26,w17 // h+=Sigma0(a) 348bc3d5698SJohn Baldwin#ifndef __AARCH64EB__ 349bc3d5698SJohn Baldwin rev w13,w13 // 10 350bc3d5698SJohn Baldwin#endif 351bc3d5698SJohn Baldwin add w26,w26,w17 // h+=Sigma0(a) 352bc3d5698SJohn Baldwin ror w16,w22,#6 353bc3d5698SJohn Baldwin add w25,w25,w19 // h+=K[i] 354bc3d5698SJohn Baldwin eor w0,w22,w22,ror#14 355bc3d5698SJohn Baldwin and w17,w23,w22 356bc3d5698SJohn Baldwin bic w19,w24,w22 357bc3d5698SJohn Baldwin add w25,w25,w13 // h+=X[i] 358bc3d5698SJohn Baldwin orr w17,w17,w19 // Ch(e,f,g) 359bc3d5698SJohn Baldwin eor w19,w26,w27 // a^b, b^c in next round 360bc3d5698SJohn Baldwin eor w16,w16,w0,ror#11 // Sigma1(e) 361bc3d5698SJohn Baldwin ror w0,w26,#2 362bc3d5698SJohn Baldwin add w25,w25,w17 // h+=Ch(e,f,g) 363bc3d5698SJohn Baldwin eor w17,w26,w26,ror#9 364bc3d5698SJohn Baldwin add w25,w25,w16 // h+=Sigma1(e) 365bc3d5698SJohn Baldwin and w28,w28,w19 // (b^c)&=(a^b) 366bc3d5698SJohn Baldwin add w21,w21,w25 // d+=h 367bc3d5698SJohn Baldwin eor w28,w28,w27 // Maj(a,b,c) 368bc3d5698SJohn Baldwin eor w17,w0,w17,ror#13 // Sigma0(a) 369bc3d5698SJohn Baldwin add w25,w25,w28 // h+=Maj(a,b,c) 370bc3d5698SJohn Baldwin ldr w28,[x30],#4 // *K++, w19 in next round 371bc3d5698SJohn Baldwin //add w25,w25,w17 // h+=Sigma0(a) 372bc3d5698SJohn Baldwin#ifndef __AARCH64EB__ 373bc3d5698SJohn Baldwin rev w14,w14 // 11 374bc3d5698SJohn Baldwin#endif 375bc3d5698SJohn Baldwin ldp w15,w0,[x1],#2*4 376bc3d5698SJohn Baldwin add w25,w25,w17 // h+=Sigma0(a) 377bc3d5698SJohn Baldwin str w6,[sp,#12] 378bc3d5698SJohn Baldwin ror w16,w21,#6 379bc3d5698SJohn Baldwin add w24,w24,w28 // h+=K[i] 380bc3d5698SJohn Baldwin eor w6,w21,w21,ror#14 381bc3d5698SJohn Baldwin and w17,w22,w21 382bc3d5698SJohn Baldwin bic w28,w23,w21 383bc3d5698SJohn Baldwin add w24,w24,w14 // h+=X[i] 384bc3d5698SJohn Baldwin orr w17,w17,w28 // Ch(e,f,g) 385bc3d5698SJohn Baldwin eor w28,w25,w26 // a^b, b^c in next round 386bc3d5698SJohn Baldwin eor w16,w16,w6,ror#11 // Sigma1(e) 387bc3d5698SJohn Baldwin ror w6,w25,#2 388bc3d5698SJohn Baldwin add w24,w24,w17 // h+=Ch(e,f,g) 389bc3d5698SJohn Baldwin eor w17,w25,w25,ror#9 390bc3d5698SJohn Baldwin add w24,w24,w16 // h+=Sigma1(e) 391bc3d5698SJohn Baldwin and w19,w19,w28 // (b^c)&=(a^b) 392bc3d5698SJohn Baldwin add w20,w20,w24 // d+=h 393bc3d5698SJohn Baldwin eor w19,w19,w26 // Maj(a,b,c) 394bc3d5698SJohn Baldwin eor w17,w6,w17,ror#13 // Sigma0(a) 395bc3d5698SJohn Baldwin add w24,w24,w19 // h+=Maj(a,b,c) 396bc3d5698SJohn Baldwin ldr w19,[x30],#4 // *K++, w28 in next round 397bc3d5698SJohn Baldwin //add w24,w24,w17 // h+=Sigma0(a) 398bc3d5698SJohn Baldwin#ifndef __AARCH64EB__ 399bc3d5698SJohn Baldwin rev w15,w15 // 12 400bc3d5698SJohn Baldwin#endif 401bc3d5698SJohn Baldwin add w24,w24,w17 // h+=Sigma0(a) 402bc3d5698SJohn Baldwin str w7,[sp,#0] 403bc3d5698SJohn Baldwin ror w16,w20,#6 404bc3d5698SJohn Baldwin add w23,w23,w19 // h+=K[i] 405bc3d5698SJohn Baldwin eor w7,w20,w20,ror#14 406bc3d5698SJohn Baldwin and w17,w21,w20 407bc3d5698SJohn Baldwin bic w19,w22,w20 408bc3d5698SJohn Baldwin add w23,w23,w15 // h+=X[i] 409bc3d5698SJohn Baldwin orr w17,w17,w19 // Ch(e,f,g) 410bc3d5698SJohn Baldwin eor w19,w24,w25 // a^b, b^c in next round 411bc3d5698SJohn Baldwin eor w16,w16,w7,ror#11 // Sigma1(e) 412bc3d5698SJohn Baldwin ror w7,w24,#2 413bc3d5698SJohn Baldwin add w23,w23,w17 // h+=Ch(e,f,g) 414bc3d5698SJohn Baldwin eor w17,w24,w24,ror#9 415bc3d5698SJohn Baldwin add w23,w23,w16 // h+=Sigma1(e) 416bc3d5698SJohn Baldwin and w28,w28,w19 // (b^c)&=(a^b) 417bc3d5698SJohn Baldwin add w27,w27,w23 // d+=h 418bc3d5698SJohn Baldwin eor w28,w28,w25 // Maj(a,b,c) 419bc3d5698SJohn Baldwin eor w17,w7,w17,ror#13 // Sigma0(a) 420bc3d5698SJohn Baldwin add w23,w23,w28 // h+=Maj(a,b,c) 421bc3d5698SJohn Baldwin ldr w28,[x30],#4 // *K++, w19 in next round 422bc3d5698SJohn Baldwin //add w23,w23,w17 // h+=Sigma0(a) 423bc3d5698SJohn Baldwin#ifndef __AARCH64EB__ 424bc3d5698SJohn Baldwin rev w0,w0 // 13 425bc3d5698SJohn Baldwin#endif 426bc3d5698SJohn Baldwin ldp w1,w2,[x1] 427bc3d5698SJohn Baldwin add w23,w23,w17 // h+=Sigma0(a) 428bc3d5698SJohn Baldwin str w8,[sp,#4] 429bc3d5698SJohn Baldwin ror w16,w27,#6 430bc3d5698SJohn Baldwin add w22,w22,w28 // h+=K[i] 431bc3d5698SJohn Baldwin eor w8,w27,w27,ror#14 432bc3d5698SJohn Baldwin and w17,w20,w27 433bc3d5698SJohn Baldwin bic w28,w21,w27 434bc3d5698SJohn Baldwin add w22,w22,w0 // h+=X[i] 435bc3d5698SJohn Baldwin orr w17,w17,w28 // Ch(e,f,g) 436bc3d5698SJohn Baldwin eor w28,w23,w24 // a^b, b^c in next round 437bc3d5698SJohn Baldwin eor w16,w16,w8,ror#11 // Sigma1(e) 438bc3d5698SJohn Baldwin ror w8,w23,#2 439bc3d5698SJohn Baldwin add w22,w22,w17 // h+=Ch(e,f,g) 440bc3d5698SJohn Baldwin eor w17,w23,w23,ror#9 441bc3d5698SJohn Baldwin add w22,w22,w16 // h+=Sigma1(e) 442bc3d5698SJohn Baldwin and w19,w19,w28 // (b^c)&=(a^b) 443bc3d5698SJohn Baldwin add w26,w26,w22 // d+=h 444bc3d5698SJohn Baldwin eor w19,w19,w24 // Maj(a,b,c) 445bc3d5698SJohn Baldwin eor w17,w8,w17,ror#13 // Sigma0(a) 446bc3d5698SJohn Baldwin add w22,w22,w19 // h+=Maj(a,b,c) 447bc3d5698SJohn Baldwin ldr w19,[x30],#4 // *K++, w28 in next round 448bc3d5698SJohn Baldwin //add w22,w22,w17 // h+=Sigma0(a) 449bc3d5698SJohn Baldwin#ifndef __AARCH64EB__ 450bc3d5698SJohn Baldwin rev w1,w1 // 14 451bc3d5698SJohn Baldwin#endif 452bc3d5698SJohn Baldwin ldr w6,[sp,#12] 453bc3d5698SJohn Baldwin add w22,w22,w17 // h+=Sigma0(a) 454bc3d5698SJohn Baldwin str w9,[sp,#8] 455bc3d5698SJohn Baldwin ror w16,w26,#6 456bc3d5698SJohn Baldwin add w21,w21,w19 // h+=K[i] 457bc3d5698SJohn Baldwin eor w9,w26,w26,ror#14 458bc3d5698SJohn Baldwin and w17,w27,w26 459bc3d5698SJohn Baldwin bic w19,w20,w26 460bc3d5698SJohn Baldwin add w21,w21,w1 // h+=X[i] 461bc3d5698SJohn Baldwin orr w17,w17,w19 // Ch(e,f,g) 462bc3d5698SJohn Baldwin eor w19,w22,w23 // a^b, b^c in next round 463bc3d5698SJohn Baldwin eor w16,w16,w9,ror#11 // Sigma1(e) 464bc3d5698SJohn Baldwin ror w9,w22,#2 465bc3d5698SJohn Baldwin add w21,w21,w17 // h+=Ch(e,f,g) 466bc3d5698SJohn Baldwin eor w17,w22,w22,ror#9 467bc3d5698SJohn Baldwin add w21,w21,w16 // h+=Sigma1(e) 468bc3d5698SJohn Baldwin and w28,w28,w19 // (b^c)&=(a^b) 469bc3d5698SJohn Baldwin add w25,w25,w21 // d+=h 470bc3d5698SJohn Baldwin eor w28,w28,w23 // Maj(a,b,c) 471bc3d5698SJohn Baldwin eor w17,w9,w17,ror#13 // Sigma0(a) 472bc3d5698SJohn Baldwin add w21,w21,w28 // h+=Maj(a,b,c) 473bc3d5698SJohn Baldwin ldr w28,[x30],#4 // *K++, w19 in next round 474bc3d5698SJohn Baldwin //add w21,w21,w17 // h+=Sigma0(a) 475bc3d5698SJohn Baldwin#ifndef __AARCH64EB__ 476bc3d5698SJohn Baldwin rev w2,w2 // 15 477bc3d5698SJohn Baldwin#endif 478bc3d5698SJohn Baldwin ldr w7,[sp,#0] 479bc3d5698SJohn Baldwin add w21,w21,w17 // h+=Sigma0(a) 480bc3d5698SJohn Baldwin str w10,[sp,#12] 481bc3d5698SJohn Baldwin ror w16,w25,#6 482bc3d5698SJohn Baldwin add w20,w20,w28 // h+=K[i] 483bc3d5698SJohn Baldwin ror w9,w4,#7 484bc3d5698SJohn Baldwin and w17,w26,w25 485bc3d5698SJohn Baldwin ror w8,w1,#17 486bc3d5698SJohn Baldwin bic w28,w27,w25 487bc3d5698SJohn Baldwin ror w10,w21,#2 488bc3d5698SJohn Baldwin add w20,w20,w2 // h+=X[i] 489bc3d5698SJohn Baldwin eor w16,w16,w25,ror#11 490bc3d5698SJohn Baldwin eor w9,w9,w4,ror#18 491bc3d5698SJohn Baldwin orr w17,w17,w28 // Ch(e,f,g) 492bc3d5698SJohn Baldwin eor w28,w21,w22 // a^b, b^c in next round 493bc3d5698SJohn Baldwin eor w16,w16,w25,ror#25 // Sigma1(e) 494bc3d5698SJohn Baldwin eor w10,w10,w21,ror#13 495bc3d5698SJohn Baldwin add w20,w20,w17 // h+=Ch(e,f,g) 496bc3d5698SJohn Baldwin and w19,w19,w28 // (b^c)&=(a^b) 497bc3d5698SJohn Baldwin eor w8,w8,w1,ror#19 498bc3d5698SJohn Baldwin eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) 499bc3d5698SJohn Baldwin add w20,w20,w16 // h+=Sigma1(e) 500bc3d5698SJohn Baldwin eor w19,w19,w22 // Maj(a,b,c) 501bc3d5698SJohn Baldwin eor w17,w10,w21,ror#22 // Sigma0(a) 502bc3d5698SJohn Baldwin eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) 503bc3d5698SJohn Baldwin add w3,w3,w12 504bc3d5698SJohn Baldwin add w24,w24,w20 // d+=h 505bc3d5698SJohn Baldwin add w20,w20,w19 // h+=Maj(a,b,c) 506bc3d5698SJohn Baldwin ldr w19,[x30],#4 // *K++, w28 in next round 507bc3d5698SJohn Baldwin add w3,w3,w9 508bc3d5698SJohn Baldwin add w20,w20,w17 // h+=Sigma0(a) 509bc3d5698SJohn Baldwin add w3,w3,w8 510bc3d5698SJohn Baldwin.Loop_16_xx: 511bc3d5698SJohn Baldwin ldr w8,[sp,#4] 512bc3d5698SJohn Baldwin str w11,[sp,#0] 513bc3d5698SJohn Baldwin ror w16,w24,#6 514bc3d5698SJohn Baldwin add w27,w27,w19 // h+=K[i] 515bc3d5698SJohn Baldwin ror w10,w5,#7 516bc3d5698SJohn Baldwin and w17,w25,w24 517bc3d5698SJohn Baldwin ror w9,w2,#17 518bc3d5698SJohn Baldwin bic w19,w26,w24 519bc3d5698SJohn Baldwin ror w11,w20,#2 520bc3d5698SJohn Baldwin add w27,w27,w3 // h+=X[i] 521bc3d5698SJohn Baldwin eor w16,w16,w24,ror#11 522bc3d5698SJohn Baldwin eor w10,w10,w5,ror#18 523bc3d5698SJohn Baldwin orr w17,w17,w19 // Ch(e,f,g) 524bc3d5698SJohn Baldwin eor w19,w20,w21 // a^b, b^c in next round 525bc3d5698SJohn Baldwin eor w16,w16,w24,ror#25 // Sigma1(e) 526bc3d5698SJohn Baldwin eor w11,w11,w20,ror#13 527bc3d5698SJohn Baldwin add w27,w27,w17 // h+=Ch(e,f,g) 528bc3d5698SJohn Baldwin and w28,w28,w19 // (b^c)&=(a^b) 529bc3d5698SJohn Baldwin eor w9,w9,w2,ror#19 530bc3d5698SJohn Baldwin eor w10,w10,w5,lsr#3 // sigma0(X[i+1]) 531bc3d5698SJohn Baldwin add w27,w27,w16 // h+=Sigma1(e) 532bc3d5698SJohn Baldwin eor w28,w28,w21 // Maj(a,b,c) 533bc3d5698SJohn Baldwin eor w17,w11,w20,ror#22 // Sigma0(a) 534bc3d5698SJohn Baldwin eor w9,w9,w2,lsr#10 // sigma1(X[i+14]) 535bc3d5698SJohn Baldwin add w4,w4,w13 536bc3d5698SJohn Baldwin add w23,w23,w27 // d+=h 537bc3d5698SJohn Baldwin add w27,w27,w28 // h+=Maj(a,b,c) 538bc3d5698SJohn Baldwin ldr w28,[x30],#4 // *K++, w19 in next round 539bc3d5698SJohn Baldwin add w4,w4,w10 540bc3d5698SJohn Baldwin add w27,w27,w17 // h+=Sigma0(a) 541bc3d5698SJohn Baldwin add w4,w4,w9 542bc3d5698SJohn Baldwin ldr w9,[sp,#8] 543bc3d5698SJohn Baldwin str w12,[sp,#4] 544bc3d5698SJohn Baldwin ror w16,w23,#6 545bc3d5698SJohn Baldwin add w26,w26,w28 // h+=K[i] 546bc3d5698SJohn Baldwin ror w11,w6,#7 547bc3d5698SJohn Baldwin and w17,w24,w23 548bc3d5698SJohn Baldwin ror w10,w3,#17 549bc3d5698SJohn Baldwin bic w28,w25,w23 550bc3d5698SJohn Baldwin ror w12,w27,#2 551bc3d5698SJohn Baldwin add w26,w26,w4 // h+=X[i] 552bc3d5698SJohn Baldwin eor w16,w16,w23,ror#11 553bc3d5698SJohn Baldwin eor w11,w11,w6,ror#18 554bc3d5698SJohn Baldwin orr w17,w17,w28 // Ch(e,f,g) 555bc3d5698SJohn Baldwin eor w28,w27,w20 // a^b, b^c in next round 556bc3d5698SJohn Baldwin eor w16,w16,w23,ror#25 // Sigma1(e) 557bc3d5698SJohn Baldwin eor w12,w12,w27,ror#13 558bc3d5698SJohn Baldwin add w26,w26,w17 // h+=Ch(e,f,g) 559bc3d5698SJohn Baldwin and w19,w19,w28 // (b^c)&=(a^b) 560bc3d5698SJohn Baldwin eor w10,w10,w3,ror#19 561bc3d5698SJohn Baldwin eor w11,w11,w6,lsr#3 // sigma0(X[i+1]) 562bc3d5698SJohn Baldwin add w26,w26,w16 // h+=Sigma1(e) 563bc3d5698SJohn Baldwin eor w19,w19,w20 // Maj(a,b,c) 564bc3d5698SJohn Baldwin eor w17,w12,w27,ror#22 // Sigma0(a) 565bc3d5698SJohn Baldwin eor w10,w10,w3,lsr#10 // sigma1(X[i+14]) 566bc3d5698SJohn Baldwin add w5,w5,w14 567bc3d5698SJohn Baldwin add w22,w22,w26 // d+=h 568bc3d5698SJohn Baldwin add w26,w26,w19 // h+=Maj(a,b,c) 569bc3d5698SJohn Baldwin ldr w19,[x30],#4 // *K++, w28 in next round 570bc3d5698SJohn Baldwin add w5,w5,w11 571bc3d5698SJohn Baldwin add w26,w26,w17 // h+=Sigma0(a) 572bc3d5698SJohn Baldwin add w5,w5,w10 573bc3d5698SJohn Baldwin ldr w10,[sp,#12] 574bc3d5698SJohn Baldwin str w13,[sp,#8] 575bc3d5698SJohn Baldwin ror w16,w22,#6 576bc3d5698SJohn Baldwin add w25,w25,w19 // h+=K[i] 577bc3d5698SJohn Baldwin ror w12,w7,#7 578bc3d5698SJohn Baldwin and w17,w23,w22 579bc3d5698SJohn Baldwin ror w11,w4,#17 580bc3d5698SJohn Baldwin bic w19,w24,w22 581bc3d5698SJohn Baldwin ror w13,w26,#2 582bc3d5698SJohn Baldwin add w25,w25,w5 // h+=X[i] 583bc3d5698SJohn Baldwin eor w16,w16,w22,ror#11 584bc3d5698SJohn Baldwin eor w12,w12,w7,ror#18 585bc3d5698SJohn Baldwin orr w17,w17,w19 // Ch(e,f,g) 586bc3d5698SJohn Baldwin eor w19,w26,w27 // a^b, b^c in next round 587bc3d5698SJohn Baldwin eor w16,w16,w22,ror#25 // Sigma1(e) 588bc3d5698SJohn Baldwin eor w13,w13,w26,ror#13 589bc3d5698SJohn Baldwin add w25,w25,w17 // h+=Ch(e,f,g) 590bc3d5698SJohn Baldwin and w28,w28,w19 // (b^c)&=(a^b) 591bc3d5698SJohn Baldwin eor w11,w11,w4,ror#19 592bc3d5698SJohn Baldwin eor w12,w12,w7,lsr#3 // sigma0(X[i+1]) 593bc3d5698SJohn Baldwin add w25,w25,w16 // h+=Sigma1(e) 594bc3d5698SJohn Baldwin eor w28,w28,w27 // Maj(a,b,c) 595bc3d5698SJohn Baldwin eor w17,w13,w26,ror#22 // Sigma0(a) 596bc3d5698SJohn Baldwin eor w11,w11,w4,lsr#10 // sigma1(X[i+14]) 597bc3d5698SJohn Baldwin add w6,w6,w15 598bc3d5698SJohn Baldwin add w21,w21,w25 // d+=h 599bc3d5698SJohn Baldwin add w25,w25,w28 // h+=Maj(a,b,c) 600bc3d5698SJohn Baldwin ldr w28,[x30],#4 // *K++, w19 in next round 601bc3d5698SJohn Baldwin add w6,w6,w12 602bc3d5698SJohn Baldwin add w25,w25,w17 // h+=Sigma0(a) 603bc3d5698SJohn Baldwin add w6,w6,w11 604bc3d5698SJohn Baldwin ldr w11,[sp,#0] 605bc3d5698SJohn Baldwin str w14,[sp,#12] 606bc3d5698SJohn Baldwin ror w16,w21,#6 607bc3d5698SJohn Baldwin add w24,w24,w28 // h+=K[i] 608bc3d5698SJohn Baldwin ror w13,w8,#7 609bc3d5698SJohn Baldwin and w17,w22,w21 610bc3d5698SJohn Baldwin ror w12,w5,#17 611bc3d5698SJohn Baldwin bic w28,w23,w21 612bc3d5698SJohn Baldwin ror w14,w25,#2 613bc3d5698SJohn Baldwin add w24,w24,w6 // h+=X[i] 614bc3d5698SJohn Baldwin eor w16,w16,w21,ror#11 615bc3d5698SJohn Baldwin eor w13,w13,w8,ror#18 616bc3d5698SJohn Baldwin orr w17,w17,w28 // Ch(e,f,g) 617bc3d5698SJohn Baldwin eor w28,w25,w26 // a^b, b^c in next round 618bc3d5698SJohn Baldwin eor w16,w16,w21,ror#25 // Sigma1(e) 619bc3d5698SJohn Baldwin eor w14,w14,w25,ror#13 620bc3d5698SJohn Baldwin add w24,w24,w17 // h+=Ch(e,f,g) 621bc3d5698SJohn Baldwin and w19,w19,w28 // (b^c)&=(a^b) 622bc3d5698SJohn Baldwin eor w12,w12,w5,ror#19 623bc3d5698SJohn Baldwin eor w13,w13,w8,lsr#3 // sigma0(X[i+1]) 624bc3d5698SJohn Baldwin add w24,w24,w16 // h+=Sigma1(e) 625bc3d5698SJohn Baldwin eor w19,w19,w26 // Maj(a,b,c) 626bc3d5698SJohn Baldwin eor w17,w14,w25,ror#22 // Sigma0(a) 627bc3d5698SJohn Baldwin eor w12,w12,w5,lsr#10 // sigma1(X[i+14]) 628bc3d5698SJohn Baldwin add w7,w7,w0 629bc3d5698SJohn Baldwin add w20,w20,w24 // d+=h 630bc3d5698SJohn Baldwin add w24,w24,w19 // h+=Maj(a,b,c) 631bc3d5698SJohn Baldwin ldr w19,[x30],#4 // *K++, w28 in next round 632bc3d5698SJohn Baldwin add w7,w7,w13 633bc3d5698SJohn Baldwin add w24,w24,w17 // h+=Sigma0(a) 634bc3d5698SJohn Baldwin add w7,w7,w12 635bc3d5698SJohn Baldwin ldr w12,[sp,#4] 636bc3d5698SJohn Baldwin str w15,[sp,#0] 637bc3d5698SJohn Baldwin ror w16,w20,#6 638bc3d5698SJohn Baldwin add w23,w23,w19 // h+=K[i] 639bc3d5698SJohn Baldwin ror w14,w9,#7 640bc3d5698SJohn Baldwin and w17,w21,w20 641bc3d5698SJohn Baldwin ror w13,w6,#17 642bc3d5698SJohn Baldwin bic w19,w22,w20 643bc3d5698SJohn Baldwin ror w15,w24,#2 644bc3d5698SJohn Baldwin add w23,w23,w7 // h+=X[i] 645bc3d5698SJohn Baldwin eor w16,w16,w20,ror#11 646bc3d5698SJohn Baldwin eor w14,w14,w9,ror#18 647bc3d5698SJohn Baldwin orr w17,w17,w19 // Ch(e,f,g) 648bc3d5698SJohn Baldwin eor w19,w24,w25 // a^b, b^c in next round 649bc3d5698SJohn Baldwin eor w16,w16,w20,ror#25 // Sigma1(e) 650bc3d5698SJohn Baldwin eor w15,w15,w24,ror#13 651bc3d5698SJohn Baldwin add w23,w23,w17 // h+=Ch(e,f,g) 652bc3d5698SJohn Baldwin and w28,w28,w19 // (b^c)&=(a^b) 653bc3d5698SJohn Baldwin eor w13,w13,w6,ror#19 654bc3d5698SJohn Baldwin eor w14,w14,w9,lsr#3 // sigma0(X[i+1]) 655bc3d5698SJohn Baldwin add w23,w23,w16 // h+=Sigma1(e) 656bc3d5698SJohn Baldwin eor w28,w28,w25 // Maj(a,b,c) 657bc3d5698SJohn Baldwin eor w17,w15,w24,ror#22 // Sigma0(a) 658bc3d5698SJohn Baldwin eor w13,w13,w6,lsr#10 // sigma1(X[i+14]) 659bc3d5698SJohn Baldwin add w8,w8,w1 660bc3d5698SJohn Baldwin add w27,w27,w23 // d+=h 661bc3d5698SJohn Baldwin add w23,w23,w28 // h+=Maj(a,b,c) 662bc3d5698SJohn Baldwin ldr w28,[x30],#4 // *K++, w19 in next round 663bc3d5698SJohn Baldwin add w8,w8,w14 664bc3d5698SJohn Baldwin add w23,w23,w17 // h+=Sigma0(a) 665bc3d5698SJohn Baldwin add w8,w8,w13 666bc3d5698SJohn Baldwin ldr w13,[sp,#8] 667bc3d5698SJohn Baldwin str w0,[sp,#4] 668bc3d5698SJohn Baldwin ror w16,w27,#6 669bc3d5698SJohn Baldwin add w22,w22,w28 // h+=K[i] 670bc3d5698SJohn Baldwin ror w15,w10,#7 671bc3d5698SJohn Baldwin and w17,w20,w27 672bc3d5698SJohn Baldwin ror w14,w7,#17 673bc3d5698SJohn Baldwin bic w28,w21,w27 674bc3d5698SJohn Baldwin ror w0,w23,#2 675bc3d5698SJohn Baldwin add w22,w22,w8 // h+=X[i] 676bc3d5698SJohn Baldwin eor w16,w16,w27,ror#11 677bc3d5698SJohn Baldwin eor w15,w15,w10,ror#18 678bc3d5698SJohn Baldwin orr w17,w17,w28 // Ch(e,f,g) 679bc3d5698SJohn Baldwin eor w28,w23,w24 // a^b, b^c in next round 680bc3d5698SJohn Baldwin eor w16,w16,w27,ror#25 // Sigma1(e) 681bc3d5698SJohn Baldwin eor w0,w0,w23,ror#13 682bc3d5698SJohn Baldwin add w22,w22,w17 // h+=Ch(e,f,g) 683bc3d5698SJohn Baldwin and w19,w19,w28 // (b^c)&=(a^b) 684bc3d5698SJohn Baldwin eor w14,w14,w7,ror#19 685bc3d5698SJohn Baldwin eor w15,w15,w10,lsr#3 // sigma0(X[i+1]) 686bc3d5698SJohn Baldwin add w22,w22,w16 // h+=Sigma1(e) 687bc3d5698SJohn Baldwin eor w19,w19,w24 // Maj(a,b,c) 688bc3d5698SJohn Baldwin eor w17,w0,w23,ror#22 // Sigma0(a) 689bc3d5698SJohn Baldwin eor w14,w14,w7,lsr#10 // sigma1(X[i+14]) 690bc3d5698SJohn Baldwin add w9,w9,w2 691bc3d5698SJohn Baldwin add w26,w26,w22 // d+=h 692bc3d5698SJohn Baldwin add w22,w22,w19 // h+=Maj(a,b,c) 693bc3d5698SJohn Baldwin ldr w19,[x30],#4 // *K++, w28 in next round 694bc3d5698SJohn Baldwin add w9,w9,w15 695bc3d5698SJohn Baldwin add w22,w22,w17 // h+=Sigma0(a) 696bc3d5698SJohn Baldwin add w9,w9,w14 697bc3d5698SJohn Baldwin ldr w14,[sp,#12] 698bc3d5698SJohn Baldwin str w1,[sp,#8] 699bc3d5698SJohn Baldwin ror w16,w26,#6 700bc3d5698SJohn Baldwin add w21,w21,w19 // h+=K[i] 701bc3d5698SJohn Baldwin ror w0,w11,#7 702bc3d5698SJohn Baldwin and w17,w27,w26 703bc3d5698SJohn Baldwin ror w15,w8,#17 704bc3d5698SJohn Baldwin bic w19,w20,w26 705bc3d5698SJohn Baldwin ror w1,w22,#2 706bc3d5698SJohn Baldwin add w21,w21,w9 // h+=X[i] 707bc3d5698SJohn Baldwin eor w16,w16,w26,ror#11 708bc3d5698SJohn Baldwin eor w0,w0,w11,ror#18 709bc3d5698SJohn Baldwin orr w17,w17,w19 // Ch(e,f,g) 710bc3d5698SJohn Baldwin eor w19,w22,w23 // a^b, b^c in next round 711bc3d5698SJohn Baldwin eor w16,w16,w26,ror#25 // Sigma1(e) 712bc3d5698SJohn Baldwin eor w1,w1,w22,ror#13 713bc3d5698SJohn Baldwin add w21,w21,w17 // h+=Ch(e,f,g) 714bc3d5698SJohn Baldwin and w28,w28,w19 // (b^c)&=(a^b) 715bc3d5698SJohn Baldwin eor w15,w15,w8,ror#19 716bc3d5698SJohn Baldwin eor w0,w0,w11,lsr#3 // sigma0(X[i+1]) 717bc3d5698SJohn Baldwin add w21,w21,w16 // h+=Sigma1(e) 718bc3d5698SJohn Baldwin eor w28,w28,w23 // Maj(a,b,c) 719bc3d5698SJohn Baldwin eor w17,w1,w22,ror#22 // Sigma0(a) 720bc3d5698SJohn Baldwin eor w15,w15,w8,lsr#10 // sigma1(X[i+14]) 721bc3d5698SJohn Baldwin add w10,w10,w3 722bc3d5698SJohn Baldwin add w25,w25,w21 // d+=h 723bc3d5698SJohn Baldwin add w21,w21,w28 // h+=Maj(a,b,c) 724bc3d5698SJohn Baldwin ldr w28,[x30],#4 // *K++, w19 in next round 725bc3d5698SJohn Baldwin add w10,w10,w0 726bc3d5698SJohn Baldwin add w21,w21,w17 // h+=Sigma0(a) 727bc3d5698SJohn Baldwin add w10,w10,w15 728bc3d5698SJohn Baldwin ldr w15,[sp,#0] 729bc3d5698SJohn Baldwin str w2,[sp,#12] 730bc3d5698SJohn Baldwin ror w16,w25,#6 731bc3d5698SJohn Baldwin add w20,w20,w28 // h+=K[i] 732bc3d5698SJohn Baldwin ror w1,w12,#7 733bc3d5698SJohn Baldwin and w17,w26,w25 734bc3d5698SJohn Baldwin ror w0,w9,#17 735bc3d5698SJohn Baldwin bic w28,w27,w25 736bc3d5698SJohn Baldwin ror w2,w21,#2 737bc3d5698SJohn Baldwin add w20,w20,w10 // h+=X[i] 738bc3d5698SJohn Baldwin eor w16,w16,w25,ror#11 739bc3d5698SJohn Baldwin eor w1,w1,w12,ror#18 740bc3d5698SJohn Baldwin orr w17,w17,w28 // Ch(e,f,g) 741bc3d5698SJohn Baldwin eor w28,w21,w22 // a^b, b^c in next round 742bc3d5698SJohn Baldwin eor w16,w16,w25,ror#25 // Sigma1(e) 743bc3d5698SJohn Baldwin eor w2,w2,w21,ror#13 744bc3d5698SJohn Baldwin add w20,w20,w17 // h+=Ch(e,f,g) 745bc3d5698SJohn Baldwin and w19,w19,w28 // (b^c)&=(a^b) 746bc3d5698SJohn Baldwin eor w0,w0,w9,ror#19 747bc3d5698SJohn Baldwin eor w1,w1,w12,lsr#3 // sigma0(X[i+1]) 748bc3d5698SJohn Baldwin add w20,w20,w16 // h+=Sigma1(e) 749bc3d5698SJohn Baldwin eor w19,w19,w22 // Maj(a,b,c) 750bc3d5698SJohn Baldwin eor w17,w2,w21,ror#22 // Sigma0(a) 751bc3d5698SJohn Baldwin eor w0,w0,w9,lsr#10 // sigma1(X[i+14]) 752bc3d5698SJohn Baldwin add w11,w11,w4 753bc3d5698SJohn Baldwin add w24,w24,w20 // d+=h 754bc3d5698SJohn Baldwin add w20,w20,w19 // h+=Maj(a,b,c) 755bc3d5698SJohn Baldwin ldr w19,[x30],#4 // *K++, w28 in next round 756bc3d5698SJohn Baldwin add w11,w11,w1 757bc3d5698SJohn Baldwin add w20,w20,w17 // h+=Sigma0(a) 758bc3d5698SJohn Baldwin add w11,w11,w0 759bc3d5698SJohn Baldwin ldr w0,[sp,#4] 760bc3d5698SJohn Baldwin str w3,[sp,#0] 761bc3d5698SJohn Baldwin ror w16,w24,#6 762bc3d5698SJohn Baldwin add w27,w27,w19 // h+=K[i] 763bc3d5698SJohn Baldwin ror w2,w13,#7 764bc3d5698SJohn Baldwin and w17,w25,w24 765bc3d5698SJohn Baldwin ror w1,w10,#17 766bc3d5698SJohn Baldwin bic w19,w26,w24 767bc3d5698SJohn Baldwin ror w3,w20,#2 768bc3d5698SJohn Baldwin add w27,w27,w11 // h+=X[i] 769bc3d5698SJohn Baldwin eor w16,w16,w24,ror#11 770bc3d5698SJohn Baldwin eor w2,w2,w13,ror#18 771bc3d5698SJohn Baldwin orr w17,w17,w19 // Ch(e,f,g) 772bc3d5698SJohn Baldwin eor w19,w20,w21 // a^b, b^c in next round 773bc3d5698SJohn Baldwin eor w16,w16,w24,ror#25 // Sigma1(e) 774bc3d5698SJohn Baldwin eor w3,w3,w20,ror#13 775bc3d5698SJohn Baldwin add w27,w27,w17 // h+=Ch(e,f,g) 776bc3d5698SJohn Baldwin and w28,w28,w19 // (b^c)&=(a^b) 777bc3d5698SJohn Baldwin eor w1,w1,w10,ror#19 778bc3d5698SJohn Baldwin eor w2,w2,w13,lsr#3 // sigma0(X[i+1]) 779bc3d5698SJohn Baldwin add w27,w27,w16 // h+=Sigma1(e) 780bc3d5698SJohn Baldwin eor w28,w28,w21 // Maj(a,b,c) 781bc3d5698SJohn Baldwin eor w17,w3,w20,ror#22 // Sigma0(a) 782bc3d5698SJohn Baldwin eor w1,w1,w10,lsr#10 // sigma1(X[i+14]) 783bc3d5698SJohn Baldwin add w12,w12,w5 784bc3d5698SJohn Baldwin add w23,w23,w27 // d+=h 785bc3d5698SJohn Baldwin add w27,w27,w28 // h+=Maj(a,b,c) 786bc3d5698SJohn Baldwin ldr w28,[x30],#4 // *K++, w19 in next round 787bc3d5698SJohn Baldwin add w12,w12,w2 788bc3d5698SJohn Baldwin add w27,w27,w17 // h+=Sigma0(a) 789bc3d5698SJohn Baldwin add w12,w12,w1 790bc3d5698SJohn Baldwin ldr w1,[sp,#8] 791bc3d5698SJohn Baldwin str w4,[sp,#4] 792bc3d5698SJohn Baldwin ror w16,w23,#6 793bc3d5698SJohn Baldwin add w26,w26,w28 // h+=K[i] 794bc3d5698SJohn Baldwin ror w3,w14,#7 795bc3d5698SJohn Baldwin and w17,w24,w23 796bc3d5698SJohn Baldwin ror w2,w11,#17 797bc3d5698SJohn Baldwin bic w28,w25,w23 798bc3d5698SJohn Baldwin ror w4,w27,#2 799bc3d5698SJohn Baldwin add w26,w26,w12 // h+=X[i] 800bc3d5698SJohn Baldwin eor w16,w16,w23,ror#11 801bc3d5698SJohn Baldwin eor w3,w3,w14,ror#18 802bc3d5698SJohn Baldwin orr w17,w17,w28 // Ch(e,f,g) 803bc3d5698SJohn Baldwin eor w28,w27,w20 // a^b, b^c in next round 804bc3d5698SJohn Baldwin eor w16,w16,w23,ror#25 // Sigma1(e) 805bc3d5698SJohn Baldwin eor w4,w4,w27,ror#13 806bc3d5698SJohn Baldwin add w26,w26,w17 // h+=Ch(e,f,g) 807bc3d5698SJohn Baldwin and w19,w19,w28 // (b^c)&=(a^b) 808bc3d5698SJohn Baldwin eor w2,w2,w11,ror#19 809bc3d5698SJohn Baldwin eor w3,w3,w14,lsr#3 // sigma0(X[i+1]) 810bc3d5698SJohn Baldwin add w26,w26,w16 // h+=Sigma1(e) 811bc3d5698SJohn Baldwin eor w19,w19,w20 // Maj(a,b,c) 812bc3d5698SJohn Baldwin eor w17,w4,w27,ror#22 // Sigma0(a) 813bc3d5698SJohn Baldwin eor w2,w2,w11,lsr#10 // sigma1(X[i+14]) 814bc3d5698SJohn Baldwin add w13,w13,w6 815bc3d5698SJohn Baldwin add w22,w22,w26 // d+=h 816bc3d5698SJohn Baldwin add w26,w26,w19 // h+=Maj(a,b,c) 817bc3d5698SJohn Baldwin ldr w19,[x30],#4 // *K++, w28 in next round 818bc3d5698SJohn Baldwin add w13,w13,w3 819bc3d5698SJohn Baldwin add w26,w26,w17 // h+=Sigma0(a) 820bc3d5698SJohn Baldwin add w13,w13,w2 821bc3d5698SJohn Baldwin ldr w2,[sp,#12] 822bc3d5698SJohn Baldwin str w5,[sp,#8] 823bc3d5698SJohn Baldwin ror w16,w22,#6 824bc3d5698SJohn Baldwin add w25,w25,w19 // h+=K[i] 825bc3d5698SJohn Baldwin ror w4,w15,#7 826bc3d5698SJohn Baldwin and w17,w23,w22 827bc3d5698SJohn Baldwin ror w3,w12,#17 828bc3d5698SJohn Baldwin bic w19,w24,w22 829bc3d5698SJohn Baldwin ror w5,w26,#2 830bc3d5698SJohn Baldwin add w25,w25,w13 // h+=X[i] 831bc3d5698SJohn Baldwin eor w16,w16,w22,ror#11 832bc3d5698SJohn Baldwin eor w4,w4,w15,ror#18 833bc3d5698SJohn Baldwin orr w17,w17,w19 // Ch(e,f,g) 834bc3d5698SJohn Baldwin eor w19,w26,w27 // a^b, b^c in next round 835bc3d5698SJohn Baldwin eor w16,w16,w22,ror#25 // Sigma1(e) 836bc3d5698SJohn Baldwin eor w5,w5,w26,ror#13 837bc3d5698SJohn Baldwin add w25,w25,w17 // h+=Ch(e,f,g) 838bc3d5698SJohn Baldwin and w28,w28,w19 // (b^c)&=(a^b) 839bc3d5698SJohn Baldwin eor w3,w3,w12,ror#19 840bc3d5698SJohn Baldwin eor w4,w4,w15,lsr#3 // sigma0(X[i+1]) 841bc3d5698SJohn Baldwin add w25,w25,w16 // h+=Sigma1(e) 842bc3d5698SJohn Baldwin eor w28,w28,w27 // Maj(a,b,c) 843bc3d5698SJohn Baldwin eor w17,w5,w26,ror#22 // Sigma0(a) 844bc3d5698SJohn Baldwin eor w3,w3,w12,lsr#10 // sigma1(X[i+14]) 845bc3d5698SJohn Baldwin add w14,w14,w7 846bc3d5698SJohn Baldwin add w21,w21,w25 // d+=h 847bc3d5698SJohn Baldwin add w25,w25,w28 // h+=Maj(a,b,c) 848bc3d5698SJohn Baldwin ldr w28,[x30],#4 // *K++, w19 in next round 849bc3d5698SJohn Baldwin add w14,w14,w4 850bc3d5698SJohn Baldwin add w25,w25,w17 // h+=Sigma0(a) 851bc3d5698SJohn Baldwin add w14,w14,w3 852bc3d5698SJohn Baldwin ldr w3,[sp,#0] 853bc3d5698SJohn Baldwin str w6,[sp,#12] 854bc3d5698SJohn Baldwin ror w16,w21,#6 855bc3d5698SJohn Baldwin add w24,w24,w28 // h+=K[i] 856bc3d5698SJohn Baldwin ror w5,w0,#7 857bc3d5698SJohn Baldwin and w17,w22,w21 858bc3d5698SJohn Baldwin ror w4,w13,#17 859bc3d5698SJohn Baldwin bic w28,w23,w21 860bc3d5698SJohn Baldwin ror w6,w25,#2 861bc3d5698SJohn Baldwin add w24,w24,w14 // h+=X[i] 862bc3d5698SJohn Baldwin eor w16,w16,w21,ror#11 863bc3d5698SJohn Baldwin eor w5,w5,w0,ror#18 864bc3d5698SJohn Baldwin orr w17,w17,w28 // Ch(e,f,g) 865bc3d5698SJohn Baldwin eor w28,w25,w26 // a^b, b^c in next round 866bc3d5698SJohn Baldwin eor w16,w16,w21,ror#25 // Sigma1(e) 867bc3d5698SJohn Baldwin eor w6,w6,w25,ror#13 868bc3d5698SJohn Baldwin add w24,w24,w17 // h+=Ch(e,f,g) 869bc3d5698SJohn Baldwin and w19,w19,w28 // (b^c)&=(a^b) 870bc3d5698SJohn Baldwin eor w4,w4,w13,ror#19 871bc3d5698SJohn Baldwin eor w5,w5,w0,lsr#3 // sigma0(X[i+1]) 872bc3d5698SJohn Baldwin add w24,w24,w16 // h+=Sigma1(e) 873bc3d5698SJohn Baldwin eor w19,w19,w26 // Maj(a,b,c) 874bc3d5698SJohn Baldwin eor w17,w6,w25,ror#22 // Sigma0(a) 875bc3d5698SJohn Baldwin eor w4,w4,w13,lsr#10 // sigma1(X[i+14]) 876bc3d5698SJohn Baldwin add w15,w15,w8 877bc3d5698SJohn Baldwin add w20,w20,w24 // d+=h 878bc3d5698SJohn Baldwin add w24,w24,w19 // h+=Maj(a,b,c) 879bc3d5698SJohn Baldwin ldr w19,[x30],#4 // *K++, w28 in next round 880bc3d5698SJohn Baldwin add w15,w15,w5 881bc3d5698SJohn Baldwin add w24,w24,w17 // h+=Sigma0(a) 882bc3d5698SJohn Baldwin add w15,w15,w4 883bc3d5698SJohn Baldwin ldr w4,[sp,#4] 884bc3d5698SJohn Baldwin str w7,[sp,#0] 885bc3d5698SJohn Baldwin ror w16,w20,#6 886bc3d5698SJohn Baldwin add w23,w23,w19 // h+=K[i] 887bc3d5698SJohn Baldwin ror w6,w1,#7 888bc3d5698SJohn Baldwin and w17,w21,w20 889bc3d5698SJohn Baldwin ror w5,w14,#17 890bc3d5698SJohn Baldwin bic w19,w22,w20 891bc3d5698SJohn Baldwin ror w7,w24,#2 892bc3d5698SJohn Baldwin add w23,w23,w15 // h+=X[i] 893bc3d5698SJohn Baldwin eor w16,w16,w20,ror#11 894bc3d5698SJohn Baldwin eor w6,w6,w1,ror#18 895bc3d5698SJohn Baldwin orr w17,w17,w19 // Ch(e,f,g) 896bc3d5698SJohn Baldwin eor w19,w24,w25 // a^b, b^c in next round 897bc3d5698SJohn Baldwin eor w16,w16,w20,ror#25 // Sigma1(e) 898bc3d5698SJohn Baldwin eor w7,w7,w24,ror#13 899bc3d5698SJohn Baldwin add w23,w23,w17 // h+=Ch(e,f,g) 900bc3d5698SJohn Baldwin and w28,w28,w19 // (b^c)&=(a^b) 901bc3d5698SJohn Baldwin eor w5,w5,w14,ror#19 902bc3d5698SJohn Baldwin eor w6,w6,w1,lsr#3 // sigma0(X[i+1]) 903bc3d5698SJohn Baldwin add w23,w23,w16 // h+=Sigma1(e) 904bc3d5698SJohn Baldwin eor w28,w28,w25 // Maj(a,b,c) 905bc3d5698SJohn Baldwin eor w17,w7,w24,ror#22 // Sigma0(a) 906bc3d5698SJohn Baldwin eor w5,w5,w14,lsr#10 // sigma1(X[i+14]) 907bc3d5698SJohn Baldwin add w0,w0,w9 908bc3d5698SJohn Baldwin add w27,w27,w23 // d+=h 909bc3d5698SJohn Baldwin add w23,w23,w28 // h+=Maj(a,b,c) 910bc3d5698SJohn Baldwin ldr w28,[x30],#4 // *K++, w19 in next round 911bc3d5698SJohn Baldwin add w0,w0,w6 912bc3d5698SJohn Baldwin add w23,w23,w17 // h+=Sigma0(a) 913bc3d5698SJohn Baldwin add w0,w0,w5 914bc3d5698SJohn Baldwin ldr w5,[sp,#8] 915bc3d5698SJohn Baldwin str w8,[sp,#4] 916bc3d5698SJohn Baldwin ror w16,w27,#6 917bc3d5698SJohn Baldwin add w22,w22,w28 // h+=K[i] 918bc3d5698SJohn Baldwin ror w7,w2,#7 919bc3d5698SJohn Baldwin and w17,w20,w27 920bc3d5698SJohn Baldwin ror w6,w15,#17 921bc3d5698SJohn Baldwin bic w28,w21,w27 922bc3d5698SJohn Baldwin ror w8,w23,#2 923bc3d5698SJohn Baldwin add w22,w22,w0 // h+=X[i] 924bc3d5698SJohn Baldwin eor w16,w16,w27,ror#11 925bc3d5698SJohn Baldwin eor w7,w7,w2,ror#18 926bc3d5698SJohn Baldwin orr w17,w17,w28 // Ch(e,f,g) 927bc3d5698SJohn Baldwin eor w28,w23,w24 // a^b, b^c in next round 928bc3d5698SJohn Baldwin eor w16,w16,w27,ror#25 // Sigma1(e) 929bc3d5698SJohn Baldwin eor w8,w8,w23,ror#13 930bc3d5698SJohn Baldwin add w22,w22,w17 // h+=Ch(e,f,g) 931bc3d5698SJohn Baldwin and w19,w19,w28 // (b^c)&=(a^b) 932bc3d5698SJohn Baldwin eor w6,w6,w15,ror#19 933bc3d5698SJohn Baldwin eor w7,w7,w2,lsr#3 // sigma0(X[i+1]) 934bc3d5698SJohn Baldwin add w22,w22,w16 // h+=Sigma1(e) 935bc3d5698SJohn Baldwin eor w19,w19,w24 // Maj(a,b,c) 936bc3d5698SJohn Baldwin eor w17,w8,w23,ror#22 // Sigma0(a) 937bc3d5698SJohn Baldwin eor w6,w6,w15,lsr#10 // sigma1(X[i+14]) 938bc3d5698SJohn Baldwin add w1,w1,w10 939bc3d5698SJohn Baldwin add w26,w26,w22 // d+=h 940bc3d5698SJohn Baldwin add w22,w22,w19 // h+=Maj(a,b,c) 941bc3d5698SJohn Baldwin ldr w19,[x30],#4 // *K++, w28 in next round 942bc3d5698SJohn Baldwin add w1,w1,w7 943bc3d5698SJohn Baldwin add w22,w22,w17 // h+=Sigma0(a) 944bc3d5698SJohn Baldwin add w1,w1,w6 945bc3d5698SJohn Baldwin ldr w6,[sp,#12] 946bc3d5698SJohn Baldwin str w9,[sp,#8] 947bc3d5698SJohn Baldwin ror w16,w26,#6 948bc3d5698SJohn Baldwin add w21,w21,w19 // h+=K[i] 949bc3d5698SJohn Baldwin ror w8,w3,#7 950bc3d5698SJohn Baldwin and w17,w27,w26 951bc3d5698SJohn Baldwin ror w7,w0,#17 952bc3d5698SJohn Baldwin bic w19,w20,w26 953bc3d5698SJohn Baldwin ror w9,w22,#2 954bc3d5698SJohn Baldwin add w21,w21,w1 // h+=X[i] 955bc3d5698SJohn Baldwin eor w16,w16,w26,ror#11 956bc3d5698SJohn Baldwin eor w8,w8,w3,ror#18 957bc3d5698SJohn Baldwin orr w17,w17,w19 // Ch(e,f,g) 958bc3d5698SJohn Baldwin eor w19,w22,w23 // a^b, b^c in next round 959bc3d5698SJohn Baldwin eor w16,w16,w26,ror#25 // Sigma1(e) 960bc3d5698SJohn Baldwin eor w9,w9,w22,ror#13 961bc3d5698SJohn Baldwin add w21,w21,w17 // h+=Ch(e,f,g) 962bc3d5698SJohn Baldwin and w28,w28,w19 // (b^c)&=(a^b) 963bc3d5698SJohn Baldwin eor w7,w7,w0,ror#19 964bc3d5698SJohn Baldwin eor w8,w8,w3,lsr#3 // sigma0(X[i+1]) 965bc3d5698SJohn Baldwin add w21,w21,w16 // h+=Sigma1(e) 966bc3d5698SJohn Baldwin eor w28,w28,w23 // Maj(a,b,c) 967bc3d5698SJohn Baldwin eor w17,w9,w22,ror#22 // Sigma0(a) 968bc3d5698SJohn Baldwin eor w7,w7,w0,lsr#10 // sigma1(X[i+14]) 969bc3d5698SJohn Baldwin add w2,w2,w11 970bc3d5698SJohn Baldwin add w25,w25,w21 // d+=h 971bc3d5698SJohn Baldwin add w21,w21,w28 // h+=Maj(a,b,c) 972bc3d5698SJohn Baldwin ldr w28,[x30],#4 // *K++, w19 in next round 973bc3d5698SJohn Baldwin add w2,w2,w8 974bc3d5698SJohn Baldwin add w21,w21,w17 // h+=Sigma0(a) 975bc3d5698SJohn Baldwin add w2,w2,w7 976bc3d5698SJohn Baldwin ldr w7,[sp,#0] 977bc3d5698SJohn Baldwin str w10,[sp,#12] 978bc3d5698SJohn Baldwin ror w16,w25,#6 979bc3d5698SJohn Baldwin add w20,w20,w28 // h+=K[i] 980bc3d5698SJohn Baldwin ror w9,w4,#7 981bc3d5698SJohn Baldwin and w17,w26,w25 982bc3d5698SJohn Baldwin ror w8,w1,#17 983bc3d5698SJohn Baldwin bic w28,w27,w25 984bc3d5698SJohn Baldwin ror w10,w21,#2 985bc3d5698SJohn Baldwin add w20,w20,w2 // h+=X[i] 986bc3d5698SJohn Baldwin eor w16,w16,w25,ror#11 987bc3d5698SJohn Baldwin eor w9,w9,w4,ror#18 988bc3d5698SJohn Baldwin orr w17,w17,w28 // Ch(e,f,g) 989bc3d5698SJohn Baldwin eor w28,w21,w22 // a^b, b^c in next round 990bc3d5698SJohn Baldwin eor w16,w16,w25,ror#25 // Sigma1(e) 991bc3d5698SJohn Baldwin eor w10,w10,w21,ror#13 992bc3d5698SJohn Baldwin add w20,w20,w17 // h+=Ch(e,f,g) 993bc3d5698SJohn Baldwin and w19,w19,w28 // (b^c)&=(a^b) 994bc3d5698SJohn Baldwin eor w8,w8,w1,ror#19 995bc3d5698SJohn Baldwin eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) 996bc3d5698SJohn Baldwin add w20,w20,w16 // h+=Sigma1(e) 997bc3d5698SJohn Baldwin eor w19,w19,w22 // Maj(a,b,c) 998bc3d5698SJohn Baldwin eor w17,w10,w21,ror#22 // Sigma0(a) 999bc3d5698SJohn Baldwin eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) 1000bc3d5698SJohn Baldwin add w3,w3,w12 1001bc3d5698SJohn Baldwin add w24,w24,w20 // d+=h 1002bc3d5698SJohn Baldwin add w20,w20,w19 // h+=Maj(a,b,c) 1003bc3d5698SJohn Baldwin ldr w19,[x30],#4 // *K++, w28 in next round 1004bc3d5698SJohn Baldwin add w3,w3,w9 1005bc3d5698SJohn Baldwin add w20,w20,w17 // h+=Sigma0(a) 1006bc3d5698SJohn Baldwin add w3,w3,w8 1007bc3d5698SJohn Baldwin cbnz w19,.Loop_16_xx 1008bc3d5698SJohn Baldwin 1009bc3d5698SJohn Baldwin ldp x0,x2,[x29,#96] 1010bc3d5698SJohn Baldwin ldr x1,[x29,#112] 1011bc3d5698SJohn Baldwin sub x30,x30,#260 // rewind 1012bc3d5698SJohn Baldwin 1013bc3d5698SJohn Baldwin ldp w3,w4,[x0] 1014bc3d5698SJohn Baldwin ldp w5,w6,[x0,#2*4] 1015bc3d5698SJohn Baldwin add x1,x1,#14*4 // advance input pointer 1016bc3d5698SJohn Baldwin ldp w7,w8,[x0,#4*4] 1017bc3d5698SJohn Baldwin add w20,w20,w3 1018bc3d5698SJohn Baldwin ldp w9,w10,[x0,#6*4] 1019bc3d5698SJohn Baldwin add w21,w21,w4 1020bc3d5698SJohn Baldwin add w22,w22,w5 1021bc3d5698SJohn Baldwin add w23,w23,w6 1022bc3d5698SJohn Baldwin stp w20,w21,[x0] 1023bc3d5698SJohn Baldwin add w24,w24,w7 1024bc3d5698SJohn Baldwin add w25,w25,w8 1025bc3d5698SJohn Baldwin stp w22,w23,[x0,#2*4] 1026bc3d5698SJohn Baldwin add w26,w26,w9 1027bc3d5698SJohn Baldwin add w27,w27,w10 1028bc3d5698SJohn Baldwin cmp x1,x2 1029bc3d5698SJohn Baldwin stp w24,w25,[x0,#4*4] 1030bc3d5698SJohn Baldwin stp w26,w27,[x0,#6*4] 1031bc3d5698SJohn Baldwin b.ne .Loop 1032bc3d5698SJohn Baldwin 1033bc3d5698SJohn Baldwin ldp x19,x20,[x29,#16] 1034bc3d5698SJohn Baldwin add sp,sp,#4*4 1035bc3d5698SJohn Baldwin ldp x21,x22,[x29,#32] 1036bc3d5698SJohn Baldwin ldp x23,x24,[x29,#48] 1037bc3d5698SJohn Baldwin ldp x25,x26,[x29,#64] 1038bc3d5698SJohn Baldwin ldp x27,x28,[x29,#80] 1039bc3d5698SJohn Baldwin ldp x29,x30,[sp],#128 1040bd9588bcSAndrew Turner AARCH64_VALIDATE_LINK_REGISTER 1041bc3d5698SJohn Baldwin ret 1042bc3d5698SJohn Baldwin.size sha256_block_data_order,.-sha256_block_data_order 1043bc3d5698SJohn Baldwin 1044bc3d5698SJohn Baldwin.align 6 1045bc3d5698SJohn Baldwin.type .LK256,%object 1046bc3d5698SJohn Baldwin.LK256: 1047bc3d5698SJohn Baldwin.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 1048bc3d5698SJohn Baldwin.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 1049bc3d5698SJohn Baldwin.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 1050bc3d5698SJohn Baldwin.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 1051bc3d5698SJohn Baldwin.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc 1052bc3d5698SJohn Baldwin.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da 1053bc3d5698SJohn Baldwin.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 1054bc3d5698SJohn Baldwin.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 1055bc3d5698SJohn Baldwin.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 1056bc3d5698SJohn Baldwin.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 1057bc3d5698SJohn Baldwin.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 1058bc3d5698SJohn Baldwin.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 1059bc3d5698SJohn Baldwin.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 1060bc3d5698SJohn Baldwin.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 1061bc3d5698SJohn Baldwin.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 1062bc3d5698SJohn Baldwin.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 1063bc3d5698SJohn Baldwin.long 0 //terminator 1064bc3d5698SJohn Baldwin.size .LK256,.-.LK256 1065bc3d5698SJohn Baldwin.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1066bc3d5698SJohn Baldwin.align 2 1067bc3d5698SJohn Baldwin.align 2 1068bc3d5698SJohn Baldwin#ifndef __KERNEL__ 1069bc3d5698SJohn Baldwin.type sha256_block_armv8,%function 1070bc3d5698SJohn Baldwin.align 6 1071bc3d5698SJohn Baldwinsha256_block_armv8: 1072bc3d5698SJohn Baldwin.Lv8_entry: 1073bd9588bcSAndrew Turner // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 1074bc3d5698SJohn Baldwin stp x29,x30,[sp,#-16]! 1075bc3d5698SJohn Baldwin add x29,sp,#0 1076bc3d5698SJohn Baldwin 1077bc3d5698SJohn Baldwin ld1 {v0.4s,v1.4s},[x0] 1078bc3d5698SJohn Baldwin adr x3,.LK256 1079bc3d5698SJohn Baldwin 1080bc3d5698SJohn Baldwin.Loop_hw: 1081bc3d5698SJohn Baldwin ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 1082bc3d5698SJohn Baldwin sub x2,x2,#1 1083bc3d5698SJohn Baldwin ld1 {v16.4s},[x3],#16 1084bc3d5698SJohn Baldwin rev32 v4.16b,v4.16b 1085bc3d5698SJohn Baldwin rev32 v5.16b,v5.16b 1086bc3d5698SJohn Baldwin rev32 v6.16b,v6.16b 1087bc3d5698SJohn Baldwin rev32 v7.16b,v7.16b 1088bc3d5698SJohn Baldwin orr v18.16b,v0.16b,v0.16b // offload 1089bc3d5698SJohn Baldwin orr v19.16b,v1.16b,v1.16b 1090bc3d5698SJohn Baldwin ld1 {v17.4s},[x3],#16 1091bc3d5698SJohn Baldwin add v16.4s,v16.4s,v4.4s 1092bc3d5698SJohn Baldwin.inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b 1093bc3d5698SJohn Baldwin orr v2.16b,v0.16b,v0.16b 1094bc3d5698SJohn Baldwin.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s 1095bc3d5698SJohn Baldwin.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s 1096bc3d5698SJohn Baldwin.inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b 1097bc3d5698SJohn Baldwin ld1 {v16.4s},[x3],#16 1098bc3d5698SJohn Baldwin add v17.4s,v17.4s,v5.4s 1099bc3d5698SJohn Baldwin.inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b 1100bc3d5698SJohn Baldwin orr v2.16b,v0.16b,v0.16b 1101bc3d5698SJohn Baldwin.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s 1102bc3d5698SJohn Baldwin.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s 1103bc3d5698SJohn Baldwin.inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b 1104bc3d5698SJohn Baldwin ld1 {v17.4s},[x3],#16 1105bc3d5698SJohn Baldwin add v16.4s,v16.4s,v6.4s 1106bc3d5698SJohn Baldwin.inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b 1107bc3d5698SJohn Baldwin orr v2.16b,v0.16b,v0.16b 1108bc3d5698SJohn Baldwin.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s 1109bc3d5698SJohn Baldwin.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s 1110bc3d5698SJohn Baldwin.inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b 1111bc3d5698SJohn Baldwin ld1 {v16.4s},[x3],#16 1112bc3d5698SJohn Baldwin add v17.4s,v17.4s,v7.4s 1113bc3d5698SJohn Baldwin.inst 0x5e282887 //sha256su0 v7.16b,v4.16b 1114bc3d5698SJohn Baldwin orr v2.16b,v0.16b,v0.16b 1115bc3d5698SJohn Baldwin.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s 1116bc3d5698SJohn Baldwin.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s 1117bc3d5698SJohn Baldwin.inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b 1118bc3d5698SJohn Baldwin ld1 {v17.4s},[x3],#16 1119bc3d5698SJohn Baldwin add v16.4s,v16.4s,v4.4s 1120bc3d5698SJohn Baldwin.inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b 1121bc3d5698SJohn Baldwin orr v2.16b,v0.16b,v0.16b 1122bc3d5698SJohn Baldwin.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s 1123bc3d5698SJohn Baldwin.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s 1124bc3d5698SJohn Baldwin.inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b 1125bc3d5698SJohn Baldwin ld1 {v16.4s},[x3],#16 1126bc3d5698SJohn Baldwin add v17.4s,v17.4s,v5.4s 1127bc3d5698SJohn Baldwin.inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b 1128bc3d5698SJohn Baldwin orr v2.16b,v0.16b,v0.16b 1129bc3d5698SJohn Baldwin.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s 1130bc3d5698SJohn Baldwin.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s 1131bc3d5698SJohn Baldwin.inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b 1132bc3d5698SJohn Baldwin ld1 {v17.4s},[x3],#16 1133bc3d5698SJohn Baldwin add v16.4s,v16.4s,v6.4s 1134bc3d5698SJohn Baldwin.inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b 1135bc3d5698SJohn Baldwin orr v2.16b,v0.16b,v0.16b 1136bc3d5698SJohn Baldwin.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s 1137bc3d5698SJohn Baldwin.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s 1138bc3d5698SJohn Baldwin.inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b 1139bc3d5698SJohn Baldwin ld1 {v16.4s},[x3],#16 1140bc3d5698SJohn Baldwin add v17.4s,v17.4s,v7.4s 1141bc3d5698SJohn Baldwin.inst 0x5e282887 //sha256su0 v7.16b,v4.16b 1142bc3d5698SJohn Baldwin orr v2.16b,v0.16b,v0.16b 1143bc3d5698SJohn Baldwin.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s 1144bc3d5698SJohn Baldwin.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s 1145bc3d5698SJohn Baldwin.inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b 1146bc3d5698SJohn Baldwin ld1 {v17.4s},[x3],#16 1147bc3d5698SJohn Baldwin add v16.4s,v16.4s,v4.4s 1148bc3d5698SJohn Baldwin.inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b 1149bc3d5698SJohn Baldwin orr v2.16b,v0.16b,v0.16b 1150bc3d5698SJohn Baldwin.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s 1151bc3d5698SJohn Baldwin.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s 1152bc3d5698SJohn Baldwin.inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b 1153bc3d5698SJohn Baldwin ld1 {v16.4s},[x3],#16 1154bc3d5698SJohn Baldwin add v17.4s,v17.4s,v5.4s 1155bc3d5698SJohn Baldwin.inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b 1156bc3d5698SJohn Baldwin orr v2.16b,v0.16b,v0.16b 1157bc3d5698SJohn Baldwin.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s 1158bc3d5698SJohn Baldwin.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s 1159bc3d5698SJohn Baldwin.inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b 1160bc3d5698SJohn Baldwin ld1 {v17.4s},[x3],#16 1161bc3d5698SJohn Baldwin add v16.4s,v16.4s,v6.4s 1162bc3d5698SJohn Baldwin.inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b 1163bc3d5698SJohn Baldwin orr v2.16b,v0.16b,v0.16b 1164bc3d5698SJohn Baldwin.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s 1165bc3d5698SJohn Baldwin.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s 1166bc3d5698SJohn Baldwin.inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b 1167bc3d5698SJohn Baldwin ld1 {v16.4s},[x3],#16 1168bc3d5698SJohn Baldwin add v17.4s,v17.4s,v7.4s 1169bc3d5698SJohn Baldwin.inst 0x5e282887 //sha256su0 v7.16b,v4.16b 1170bc3d5698SJohn Baldwin orr v2.16b,v0.16b,v0.16b 1171bc3d5698SJohn Baldwin.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s 1172bc3d5698SJohn Baldwin.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s 1173bc3d5698SJohn Baldwin.inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b 1174bc3d5698SJohn Baldwin ld1 {v17.4s},[x3],#16 1175bc3d5698SJohn Baldwin add v16.4s,v16.4s,v4.4s 1176bc3d5698SJohn Baldwin orr v2.16b,v0.16b,v0.16b 1177bc3d5698SJohn Baldwin.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s 1178bc3d5698SJohn Baldwin.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s 1179bc3d5698SJohn Baldwin 1180bc3d5698SJohn Baldwin ld1 {v16.4s},[x3],#16 1181bc3d5698SJohn Baldwin add v17.4s,v17.4s,v5.4s 1182bc3d5698SJohn Baldwin orr v2.16b,v0.16b,v0.16b 1183bc3d5698SJohn Baldwin.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s 1184bc3d5698SJohn Baldwin.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s 1185bc3d5698SJohn Baldwin 1186bc3d5698SJohn Baldwin ld1 {v17.4s},[x3] 1187bc3d5698SJohn Baldwin add v16.4s,v16.4s,v6.4s 1188bc3d5698SJohn Baldwin sub x3,x3,#64*4-16 // rewind 1189bc3d5698SJohn Baldwin orr v2.16b,v0.16b,v0.16b 1190bc3d5698SJohn Baldwin.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s 1191bc3d5698SJohn Baldwin.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s 1192bc3d5698SJohn Baldwin 1193bc3d5698SJohn Baldwin add v17.4s,v17.4s,v7.4s 1194bc3d5698SJohn Baldwin orr v2.16b,v0.16b,v0.16b 1195bc3d5698SJohn Baldwin.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s 1196bc3d5698SJohn Baldwin.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s 1197bc3d5698SJohn Baldwin 1198bc3d5698SJohn Baldwin add v0.4s,v0.4s,v18.4s 1199bc3d5698SJohn Baldwin add v1.4s,v1.4s,v19.4s 1200bc3d5698SJohn Baldwin 1201bc3d5698SJohn Baldwin cbnz x2,.Loop_hw 1202bc3d5698SJohn Baldwin 1203bc3d5698SJohn Baldwin st1 {v0.4s,v1.4s},[x0] 1204bc3d5698SJohn Baldwin 1205bc3d5698SJohn Baldwin ldr x29,[sp],#16 1206bc3d5698SJohn Baldwin ret 1207bc3d5698SJohn Baldwin.size sha256_block_armv8,.-sha256_block_armv8 1208bc3d5698SJohn Baldwin#endif 1209bc3d5698SJohn Baldwin#ifdef __KERNEL__ 1210bc3d5698SJohn Baldwin.globl sha256_block_neon 1211bc3d5698SJohn Baldwin#endif 1212bc3d5698SJohn Baldwin.type sha256_block_neon,%function 1213bc3d5698SJohn Baldwin.align 4 1214bc3d5698SJohn Baldwinsha256_block_neon: 1215bd9588bcSAndrew Turner AARCH64_VALID_CALL_TARGET 1216bc3d5698SJohn Baldwin.Lneon_entry: 1217bd9588bcSAndrew Turner // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later 1218bc3d5698SJohn Baldwin stp x29, x30, [sp, #-16]! 1219bc3d5698SJohn Baldwin mov x29, sp 1220bc3d5698SJohn Baldwin sub sp,sp,#16*4 1221bc3d5698SJohn Baldwin 1222bc3d5698SJohn Baldwin adr x16,.LK256 1223bc3d5698SJohn Baldwin add x2,x1,x2,lsl#6 // len to point at the end of inp 1224bc3d5698SJohn Baldwin 1225bc3d5698SJohn Baldwin ld1 {v0.16b},[x1], #16 1226bc3d5698SJohn Baldwin ld1 {v1.16b},[x1], #16 1227bc3d5698SJohn Baldwin ld1 {v2.16b},[x1], #16 1228bc3d5698SJohn Baldwin ld1 {v3.16b},[x1], #16 1229bc3d5698SJohn Baldwin ld1 {v4.4s},[x16], #16 1230bc3d5698SJohn Baldwin ld1 {v5.4s},[x16], #16 1231bc3d5698SJohn Baldwin ld1 {v6.4s},[x16], #16 1232bc3d5698SJohn Baldwin ld1 {v7.4s},[x16], #16 1233bc3d5698SJohn Baldwin rev32 v0.16b,v0.16b // yes, even on 1234bc3d5698SJohn Baldwin rev32 v1.16b,v1.16b // big-endian 1235bc3d5698SJohn Baldwin rev32 v2.16b,v2.16b 1236bc3d5698SJohn Baldwin rev32 v3.16b,v3.16b 1237bc3d5698SJohn Baldwin mov x17,sp 1238bc3d5698SJohn Baldwin add v4.4s,v4.4s,v0.4s 1239bc3d5698SJohn Baldwin add v5.4s,v5.4s,v1.4s 1240bc3d5698SJohn Baldwin add v6.4s,v6.4s,v2.4s 1241bc3d5698SJohn Baldwin st1 {v4.4s,v5.4s},[x17], #32 1242bc3d5698SJohn Baldwin add v7.4s,v7.4s,v3.4s 1243bc3d5698SJohn Baldwin st1 {v6.4s,v7.4s},[x17] 1244bc3d5698SJohn Baldwin sub x17,x17,#32 1245bc3d5698SJohn Baldwin 1246bc3d5698SJohn Baldwin ldp w3,w4,[x0] 1247bc3d5698SJohn Baldwin ldp w5,w6,[x0,#8] 1248bc3d5698SJohn Baldwin ldp w7,w8,[x0,#16] 1249bc3d5698SJohn Baldwin ldp w9,w10,[x0,#24] 1250bc3d5698SJohn Baldwin ldr w12,[sp,#0] 1251bc3d5698SJohn Baldwin mov w13,wzr 1252bc3d5698SJohn Baldwin eor w14,w4,w5 1253bc3d5698SJohn Baldwin mov w15,wzr 1254bc3d5698SJohn Baldwin b .L_00_48 1255bc3d5698SJohn Baldwin 1256bc3d5698SJohn Baldwin.align 4 1257bc3d5698SJohn Baldwin.L_00_48: 1258bc3d5698SJohn Baldwin ext v4.16b,v0.16b,v1.16b,#4 1259bc3d5698SJohn Baldwin add w10,w10,w12 1260bc3d5698SJohn Baldwin add w3,w3,w15 1261bc3d5698SJohn Baldwin and w12,w8,w7 1262bc3d5698SJohn Baldwin bic w15,w9,w7 1263bc3d5698SJohn Baldwin ext v7.16b,v2.16b,v3.16b,#4 1264bc3d5698SJohn Baldwin eor w11,w7,w7,ror#5 1265bc3d5698SJohn Baldwin add w3,w3,w13 1266bc3d5698SJohn Baldwin mov d19,v3.d[1] 1267bc3d5698SJohn Baldwin orr w12,w12,w15 1268bc3d5698SJohn Baldwin eor w11,w11,w7,ror#19 1269bc3d5698SJohn Baldwin ushr v6.4s,v4.4s,#7 1270bc3d5698SJohn Baldwin eor w15,w3,w3,ror#11 1271bc3d5698SJohn Baldwin ushr v5.4s,v4.4s,#3 1272bc3d5698SJohn Baldwin add w10,w10,w12 1273bc3d5698SJohn Baldwin add v0.4s,v0.4s,v7.4s 1274bc3d5698SJohn Baldwin ror w11,w11,#6 1275bc3d5698SJohn Baldwin sli v6.4s,v4.4s,#25 1276bc3d5698SJohn Baldwin eor w13,w3,w4 1277bc3d5698SJohn Baldwin eor w15,w15,w3,ror#20 1278bc3d5698SJohn Baldwin ushr v7.4s,v4.4s,#18 1279bc3d5698SJohn Baldwin add w10,w10,w11 1280bc3d5698SJohn Baldwin ldr w12,[sp,#4] 1281bc3d5698SJohn Baldwin and w14,w14,w13 1282bc3d5698SJohn Baldwin eor v5.16b,v5.16b,v6.16b 1283bc3d5698SJohn Baldwin ror w15,w15,#2 1284bc3d5698SJohn Baldwin add w6,w6,w10 1285bc3d5698SJohn Baldwin sli v7.4s,v4.4s,#14 1286bc3d5698SJohn Baldwin eor w14,w14,w4 1287bc3d5698SJohn Baldwin ushr v16.4s,v19.4s,#17 1288bc3d5698SJohn Baldwin add w9,w9,w12 1289bc3d5698SJohn Baldwin add w10,w10,w15 1290bc3d5698SJohn Baldwin and w12,w7,w6 1291bc3d5698SJohn Baldwin eor v5.16b,v5.16b,v7.16b 1292bc3d5698SJohn Baldwin bic w15,w8,w6 1293bc3d5698SJohn Baldwin eor w11,w6,w6,ror#5 1294bc3d5698SJohn Baldwin sli v16.4s,v19.4s,#15 1295bc3d5698SJohn Baldwin add w10,w10,w14 1296bc3d5698SJohn Baldwin orr w12,w12,w15 1297bc3d5698SJohn Baldwin ushr v17.4s,v19.4s,#10 1298bc3d5698SJohn Baldwin eor w11,w11,w6,ror#19 1299bc3d5698SJohn Baldwin eor w15,w10,w10,ror#11 1300bc3d5698SJohn Baldwin ushr v7.4s,v19.4s,#19 1301bc3d5698SJohn Baldwin add w9,w9,w12 1302bc3d5698SJohn Baldwin ror w11,w11,#6 1303bc3d5698SJohn Baldwin add v0.4s,v0.4s,v5.4s 1304bc3d5698SJohn Baldwin eor w14,w10,w3 1305bc3d5698SJohn Baldwin eor w15,w15,w10,ror#20 1306bc3d5698SJohn Baldwin sli v7.4s,v19.4s,#13 1307bc3d5698SJohn Baldwin add w9,w9,w11 1308bc3d5698SJohn Baldwin ldr w12,[sp,#8] 1309bc3d5698SJohn Baldwin and w13,w13,w14 1310bc3d5698SJohn Baldwin eor v17.16b,v17.16b,v16.16b 1311bc3d5698SJohn Baldwin ror w15,w15,#2 1312bc3d5698SJohn Baldwin add w5,w5,w9 1313bc3d5698SJohn Baldwin eor w13,w13,w3 1314bc3d5698SJohn Baldwin eor v17.16b,v17.16b,v7.16b 1315bc3d5698SJohn Baldwin add w8,w8,w12 1316bc3d5698SJohn Baldwin add w9,w9,w15 1317bc3d5698SJohn Baldwin and w12,w6,w5 1318bc3d5698SJohn Baldwin add v0.4s,v0.4s,v17.4s 1319bc3d5698SJohn Baldwin bic w15,w7,w5 1320bc3d5698SJohn Baldwin eor w11,w5,w5,ror#5 1321bc3d5698SJohn Baldwin add w9,w9,w13 1322bc3d5698SJohn Baldwin ushr v18.4s,v0.4s,#17 1323bc3d5698SJohn Baldwin orr w12,w12,w15 1324bc3d5698SJohn Baldwin ushr v19.4s,v0.4s,#10 1325bc3d5698SJohn Baldwin eor w11,w11,w5,ror#19 1326bc3d5698SJohn Baldwin eor w15,w9,w9,ror#11 1327bc3d5698SJohn Baldwin sli v18.4s,v0.4s,#15 1328bc3d5698SJohn Baldwin add w8,w8,w12 1329bc3d5698SJohn Baldwin ushr v17.4s,v0.4s,#19 1330bc3d5698SJohn Baldwin ror w11,w11,#6 1331bc3d5698SJohn Baldwin eor w13,w9,w10 1332bc3d5698SJohn Baldwin eor v19.16b,v19.16b,v18.16b 1333bc3d5698SJohn Baldwin eor w15,w15,w9,ror#20 1334bc3d5698SJohn Baldwin add w8,w8,w11 1335bc3d5698SJohn Baldwin sli v17.4s,v0.4s,#13 1336bc3d5698SJohn Baldwin ldr w12,[sp,#12] 1337bc3d5698SJohn Baldwin and w14,w14,w13 1338bc3d5698SJohn Baldwin ror w15,w15,#2 1339bc3d5698SJohn Baldwin ld1 {v4.4s},[x16], #16 1340bc3d5698SJohn Baldwin add w4,w4,w8 1341bc3d5698SJohn Baldwin eor v19.16b,v19.16b,v17.16b 1342bc3d5698SJohn Baldwin eor w14,w14,w10 1343bc3d5698SJohn Baldwin eor v17.16b,v17.16b,v17.16b 1344bc3d5698SJohn Baldwin add w7,w7,w12 1345bc3d5698SJohn Baldwin add w8,w8,w15 1346bc3d5698SJohn Baldwin and w12,w5,w4 1347bc3d5698SJohn Baldwin mov v17.d[1],v19.d[0] 1348bc3d5698SJohn Baldwin bic w15,w6,w4 1349bc3d5698SJohn Baldwin eor w11,w4,w4,ror#5 1350bc3d5698SJohn Baldwin add w8,w8,w14 1351bc3d5698SJohn Baldwin add v0.4s,v0.4s,v17.4s 1352bc3d5698SJohn Baldwin orr w12,w12,w15 1353bc3d5698SJohn Baldwin eor w11,w11,w4,ror#19 1354bc3d5698SJohn Baldwin eor w15,w8,w8,ror#11 1355bc3d5698SJohn Baldwin add v4.4s,v4.4s,v0.4s 1356bc3d5698SJohn Baldwin add w7,w7,w12 1357bc3d5698SJohn Baldwin ror w11,w11,#6 1358bc3d5698SJohn Baldwin eor w14,w8,w9 1359bc3d5698SJohn Baldwin eor w15,w15,w8,ror#20 1360bc3d5698SJohn Baldwin add w7,w7,w11 1361bc3d5698SJohn Baldwin ldr w12,[sp,#16] 1362bc3d5698SJohn Baldwin and w13,w13,w14 1363bc3d5698SJohn Baldwin ror w15,w15,#2 1364bc3d5698SJohn Baldwin add w3,w3,w7 1365bc3d5698SJohn Baldwin eor w13,w13,w9 1366bc3d5698SJohn Baldwin st1 {v4.4s},[x17], #16 1367bc3d5698SJohn Baldwin ext v4.16b,v1.16b,v2.16b,#4 1368bc3d5698SJohn Baldwin add w6,w6,w12 1369bc3d5698SJohn Baldwin add w7,w7,w15 1370bc3d5698SJohn Baldwin and w12,w4,w3 1371bc3d5698SJohn Baldwin bic w15,w5,w3 1372bc3d5698SJohn Baldwin ext v7.16b,v3.16b,v0.16b,#4 1373bc3d5698SJohn Baldwin eor w11,w3,w3,ror#5 1374bc3d5698SJohn Baldwin add w7,w7,w13 1375bc3d5698SJohn Baldwin mov d19,v0.d[1] 1376bc3d5698SJohn Baldwin orr w12,w12,w15 1377bc3d5698SJohn Baldwin eor w11,w11,w3,ror#19 1378bc3d5698SJohn Baldwin ushr v6.4s,v4.4s,#7 1379bc3d5698SJohn Baldwin eor w15,w7,w7,ror#11 1380bc3d5698SJohn Baldwin ushr v5.4s,v4.4s,#3 1381bc3d5698SJohn Baldwin add w6,w6,w12 1382bc3d5698SJohn Baldwin add v1.4s,v1.4s,v7.4s 1383bc3d5698SJohn Baldwin ror w11,w11,#6 1384bc3d5698SJohn Baldwin sli v6.4s,v4.4s,#25 1385bc3d5698SJohn Baldwin eor w13,w7,w8 1386bc3d5698SJohn Baldwin eor w15,w15,w7,ror#20 1387bc3d5698SJohn Baldwin ushr v7.4s,v4.4s,#18 1388bc3d5698SJohn Baldwin add w6,w6,w11 1389bc3d5698SJohn Baldwin ldr w12,[sp,#20] 1390bc3d5698SJohn Baldwin and w14,w14,w13 1391bc3d5698SJohn Baldwin eor v5.16b,v5.16b,v6.16b 1392bc3d5698SJohn Baldwin ror w15,w15,#2 1393bc3d5698SJohn Baldwin add w10,w10,w6 1394bc3d5698SJohn Baldwin sli v7.4s,v4.4s,#14 1395bc3d5698SJohn Baldwin eor w14,w14,w8 1396bc3d5698SJohn Baldwin ushr v16.4s,v19.4s,#17 1397bc3d5698SJohn Baldwin add w5,w5,w12 1398bc3d5698SJohn Baldwin add w6,w6,w15 1399bc3d5698SJohn Baldwin and w12,w3,w10 1400bc3d5698SJohn Baldwin eor v5.16b,v5.16b,v7.16b 1401bc3d5698SJohn Baldwin bic w15,w4,w10 1402bc3d5698SJohn Baldwin eor w11,w10,w10,ror#5 1403bc3d5698SJohn Baldwin sli v16.4s,v19.4s,#15 1404bc3d5698SJohn Baldwin add w6,w6,w14 1405bc3d5698SJohn Baldwin orr w12,w12,w15 1406bc3d5698SJohn Baldwin ushr v17.4s,v19.4s,#10 1407bc3d5698SJohn Baldwin eor w11,w11,w10,ror#19 1408bc3d5698SJohn Baldwin eor w15,w6,w6,ror#11 1409bc3d5698SJohn Baldwin ushr v7.4s,v19.4s,#19 1410bc3d5698SJohn Baldwin add w5,w5,w12 1411bc3d5698SJohn Baldwin ror w11,w11,#6 1412bc3d5698SJohn Baldwin add v1.4s,v1.4s,v5.4s 1413bc3d5698SJohn Baldwin eor w14,w6,w7 1414bc3d5698SJohn Baldwin eor w15,w15,w6,ror#20 1415bc3d5698SJohn Baldwin sli v7.4s,v19.4s,#13 1416bc3d5698SJohn Baldwin add w5,w5,w11 1417bc3d5698SJohn Baldwin ldr w12,[sp,#24] 1418bc3d5698SJohn Baldwin and w13,w13,w14 1419bc3d5698SJohn Baldwin eor v17.16b,v17.16b,v16.16b 1420bc3d5698SJohn Baldwin ror w15,w15,#2 1421bc3d5698SJohn Baldwin add w9,w9,w5 1422bc3d5698SJohn Baldwin eor w13,w13,w7 1423bc3d5698SJohn Baldwin eor v17.16b,v17.16b,v7.16b 1424bc3d5698SJohn Baldwin add w4,w4,w12 1425bc3d5698SJohn Baldwin add w5,w5,w15 1426bc3d5698SJohn Baldwin and w12,w10,w9 1427bc3d5698SJohn Baldwin add v1.4s,v1.4s,v17.4s 1428bc3d5698SJohn Baldwin bic w15,w3,w9 1429bc3d5698SJohn Baldwin eor w11,w9,w9,ror#5 1430bc3d5698SJohn Baldwin add w5,w5,w13 1431bc3d5698SJohn Baldwin ushr v18.4s,v1.4s,#17 1432bc3d5698SJohn Baldwin orr w12,w12,w15 1433bc3d5698SJohn Baldwin ushr v19.4s,v1.4s,#10 1434bc3d5698SJohn Baldwin eor w11,w11,w9,ror#19 1435bc3d5698SJohn Baldwin eor w15,w5,w5,ror#11 1436bc3d5698SJohn Baldwin sli v18.4s,v1.4s,#15 1437bc3d5698SJohn Baldwin add w4,w4,w12 1438bc3d5698SJohn Baldwin ushr v17.4s,v1.4s,#19 1439bc3d5698SJohn Baldwin ror w11,w11,#6 1440bc3d5698SJohn Baldwin eor w13,w5,w6 1441bc3d5698SJohn Baldwin eor v19.16b,v19.16b,v18.16b 1442bc3d5698SJohn Baldwin eor w15,w15,w5,ror#20 1443bc3d5698SJohn Baldwin add w4,w4,w11 1444bc3d5698SJohn Baldwin sli v17.4s,v1.4s,#13 1445bc3d5698SJohn Baldwin ldr w12,[sp,#28] 1446bc3d5698SJohn Baldwin and w14,w14,w13 1447bc3d5698SJohn Baldwin ror w15,w15,#2 1448bc3d5698SJohn Baldwin ld1 {v4.4s},[x16], #16 1449bc3d5698SJohn Baldwin add w8,w8,w4 1450bc3d5698SJohn Baldwin eor v19.16b,v19.16b,v17.16b 1451bc3d5698SJohn Baldwin eor w14,w14,w6 1452bc3d5698SJohn Baldwin eor v17.16b,v17.16b,v17.16b 1453bc3d5698SJohn Baldwin add w3,w3,w12 1454bc3d5698SJohn Baldwin add w4,w4,w15 1455bc3d5698SJohn Baldwin and w12,w9,w8 1456bc3d5698SJohn Baldwin mov v17.d[1],v19.d[0] 1457bc3d5698SJohn Baldwin bic w15,w10,w8 1458bc3d5698SJohn Baldwin eor w11,w8,w8,ror#5 1459bc3d5698SJohn Baldwin add w4,w4,w14 1460bc3d5698SJohn Baldwin add v1.4s,v1.4s,v17.4s 1461bc3d5698SJohn Baldwin orr w12,w12,w15 1462bc3d5698SJohn Baldwin eor w11,w11,w8,ror#19 1463bc3d5698SJohn Baldwin eor w15,w4,w4,ror#11 1464bc3d5698SJohn Baldwin add v4.4s,v4.4s,v1.4s 1465bc3d5698SJohn Baldwin add w3,w3,w12 1466bc3d5698SJohn Baldwin ror w11,w11,#6 1467bc3d5698SJohn Baldwin eor w14,w4,w5 1468bc3d5698SJohn Baldwin eor w15,w15,w4,ror#20 1469bc3d5698SJohn Baldwin add w3,w3,w11 1470bc3d5698SJohn Baldwin ldr w12,[sp,#32] 1471bc3d5698SJohn Baldwin and w13,w13,w14 1472bc3d5698SJohn Baldwin ror w15,w15,#2 1473bc3d5698SJohn Baldwin add w7,w7,w3 1474bc3d5698SJohn Baldwin eor w13,w13,w5 1475bc3d5698SJohn Baldwin st1 {v4.4s},[x17], #16 1476bc3d5698SJohn Baldwin ext v4.16b,v2.16b,v3.16b,#4 1477bc3d5698SJohn Baldwin add w10,w10,w12 1478bc3d5698SJohn Baldwin add w3,w3,w15 1479bc3d5698SJohn Baldwin and w12,w8,w7 1480bc3d5698SJohn Baldwin bic w15,w9,w7 1481bc3d5698SJohn Baldwin ext v7.16b,v0.16b,v1.16b,#4 1482bc3d5698SJohn Baldwin eor w11,w7,w7,ror#5 1483bc3d5698SJohn Baldwin add w3,w3,w13 1484bc3d5698SJohn Baldwin mov d19,v1.d[1] 1485bc3d5698SJohn Baldwin orr w12,w12,w15 1486bc3d5698SJohn Baldwin eor w11,w11,w7,ror#19 1487bc3d5698SJohn Baldwin ushr v6.4s,v4.4s,#7 1488bc3d5698SJohn Baldwin eor w15,w3,w3,ror#11 1489bc3d5698SJohn Baldwin ushr v5.4s,v4.4s,#3 1490bc3d5698SJohn Baldwin add w10,w10,w12 1491bc3d5698SJohn Baldwin add v2.4s,v2.4s,v7.4s 1492bc3d5698SJohn Baldwin ror w11,w11,#6 1493bc3d5698SJohn Baldwin sli v6.4s,v4.4s,#25 1494bc3d5698SJohn Baldwin eor w13,w3,w4 1495bc3d5698SJohn Baldwin eor w15,w15,w3,ror#20 1496bc3d5698SJohn Baldwin ushr v7.4s,v4.4s,#18 1497bc3d5698SJohn Baldwin add w10,w10,w11 1498bc3d5698SJohn Baldwin ldr w12,[sp,#36] 1499bc3d5698SJohn Baldwin and w14,w14,w13 1500bc3d5698SJohn Baldwin eor v5.16b,v5.16b,v6.16b 1501bc3d5698SJohn Baldwin ror w15,w15,#2 1502bc3d5698SJohn Baldwin add w6,w6,w10 1503bc3d5698SJohn Baldwin sli v7.4s,v4.4s,#14 1504bc3d5698SJohn Baldwin eor w14,w14,w4 1505bc3d5698SJohn Baldwin ushr v16.4s,v19.4s,#17 1506bc3d5698SJohn Baldwin add w9,w9,w12 1507bc3d5698SJohn Baldwin add w10,w10,w15 1508bc3d5698SJohn Baldwin and w12,w7,w6 1509bc3d5698SJohn Baldwin eor v5.16b,v5.16b,v7.16b 1510bc3d5698SJohn Baldwin bic w15,w8,w6 1511bc3d5698SJohn Baldwin eor w11,w6,w6,ror#5 1512bc3d5698SJohn Baldwin sli v16.4s,v19.4s,#15 1513bc3d5698SJohn Baldwin add w10,w10,w14 1514bc3d5698SJohn Baldwin orr w12,w12,w15 1515bc3d5698SJohn Baldwin ushr v17.4s,v19.4s,#10 1516bc3d5698SJohn Baldwin eor w11,w11,w6,ror#19 1517bc3d5698SJohn Baldwin eor w15,w10,w10,ror#11 1518bc3d5698SJohn Baldwin ushr v7.4s,v19.4s,#19 1519bc3d5698SJohn Baldwin add w9,w9,w12 1520bc3d5698SJohn Baldwin ror w11,w11,#6 1521bc3d5698SJohn Baldwin add v2.4s,v2.4s,v5.4s 1522bc3d5698SJohn Baldwin eor w14,w10,w3 1523bc3d5698SJohn Baldwin eor w15,w15,w10,ror#20 1524bc3d5698SJohn Baldwin sli v7.4s,v19.4s,#13 1525bc3d5698SJohn Baldwin add w9,w9,w11 1526bc3d5698SJohn Baldwin ldr w12,[sp,#40] 1527bc3d5698SJohn Baldwin and w13,w13,w14 1528bc3d5698SJohn Baldwin eor v17.16b,v17.16b,v16.16b 1529bc3d5698SJohn Baldwin ror w15,w15,#2 1530bc3d5698SJohn Baldwin add w5,w5,w9 1531bc3d5698SJohn Baldwin eor w13,w13,w3 1532bc3d5698SJohn Baldwin eor v17.16b,v17.16b,v7.16b 1533bc3d5698SJohn Baldwin add w8,w8,w12 1534bc3d5698SJohn Baldwin add w9,w9,w15 1535bc3d5698SJohn Baldwin and w12,w6,w5 1536bc3d5698SJohn Baldwin add v2.4s,v2.4s,v17.4s 1537bc3d5698SJohn Baldwin bic w15,w7,w5 1538bc3d5698SJohn Baldwin eor w11,w5,w5,ror#5 1539bc3d5698SJohn Baldwin add w9,w9,w13 1540bc3d5698SJohn Baldwin ushr v18.4s,v2.4s,#17 1541bc3d5698SJohn Baldwin orr w12,w12,w15 1542bc3d5698SJohn Baldwin ushr v19.4s,v2.4s,#10 1543bc3d5698SJohn Baldwin eor w11,w11,w5,ror#19 1544bc3d5698SJohn Baldwin eor w15,w9,w9,ror#11 1545bc3d5698SJohn Baldwin sli v18.4s,v2.4s,#15 1546bc3d5698SJohn Baldwin add w8,w8,w12 1547bc3d5698SJohn Baldwin ushr v17.4s,v2.4s,#19 1548bc3d5698SJohn Baldwin ror w11,w11,#6 1549bc3d5698SJohn Baldwin eor w13,w9,w10 1550bc3d5698SJohn Baldwin eor v19.16b,v19.16b,v18.16b 1551bc3d5698SJohn Baldwin eor w15,w15,w9,ror#20 1552bc3d5698SJohn Baldwin add w8,w8,w11 1553bc3d5698SJohn Baldwin sli v17.4s,v2.4s,#13 1554bc3d5698SJohn Baldwin ldr w12,[sp,#44] 1555bc3d5698SJohn Baldwin and w14,w14,w13 1556bc3d5698SJohn Baldwin ror w15,w15,#2 1557bc3d5698SJohn Baldwin ld1 {v4.4s},[x16], #16 1558bc3d5698SJohn Baldwin add w4,w4,w8 1559bc3d5698SJohn Baldwin eor v19.16b,v19.16b,v17.16b 1560bc3d5698SJohn Baldwin eor w14,w14,w10 1561bc3d5698SJohn Baldwin eor v17.16b,v17.16b,v17.16b 1562bc3d5698SJohn Baldwin add w7,w7,w12 1563bc3d5698SJohn Baldwin add w8,w8,w15 1564bc3d5698SJohn Baldwin and w12,w5,w4 1565bc3d5698SJohn Baldwin mov v17.d[1],v19.d[0] 1566bc3d5698SJohn Baldwin bic w15,w6,w4 1567bc3d5698SJohn Baldwin eor w11,w4,w4,ror#5 1568bc3d5698SJohn Baldwin add w8,w8,w14 1569bc3d5698SJohn Baldwin add v2.4s,v2.4s,v17.4s 1570bc3d5698SJohn Baldwin orr w12,w12,w15 1571bc3d5698SJohn Baldwin eor w11,w11,w4,ror#19 1572bc3d5698SJohn Baldwin eor w15,w8,w8,ror#11 1573bc3d5698SJohn Baldwin add v4.4s,v4.4s,v2.4s 1574bc3d5698SJohn Baldwin add w7,w7,w12 1575bc3d5698SJohn Baldwin ror w11,w11,#6 1576bc3d5698SJohn Baldwin eor w14,w8,w9 1577bc3d5698SJohn Baldwin eor w15,w15,w8,ror#20 1578bc3d5698SJohn Baldwin add w7,w7,w11 1579bc3d5698SJohn Baldwin ldr w12,[sp,#48] 1580bc3d5698SJohn Baldwin and w13,w13,w14 1581bc3d5698SJohn Baldwin ror w15,w15,#2 1582bc3d5698SJohn Baldwin add w3,w3,w7 1583bc3d5698SJohn Baldwin eor w13,w13,w9 1584bc3d5698SJohn Baldwin st1 {v4.4s},[x17], #16 1585bc3d5698SJohn Baldwin ext v4.16b,v3.16b,v0.16b,#4 1586bc3d5698SJohn Baldwin add w6,w6,w12 1587bc3d5698SJohn Baldwin add w7,w7,w15 1588bc3d5698SJohn Baldwin and w12,w4,w3 1589bc3d5698SJohn Baldwin bic w15,w5,w3 1590bc3d5698SJohn Baldwin ext v7.16b,v1.16b,v2.16b,#4 1591bc3d5698SJohn Baldwin eor w11,w3,w3,ror#5 1592bc3d5698SJohn Baldwin add w7,w7,w13 1593bc3d5698SJohn Baldwin mov d19,v2.d[1] 1594bc3d5698SJohn Baldwin orr w12,w12,w15 1595bc3d5698SJohn Baldwin eor w11,w11,w3,ror#19 1596bc3d5698SJohn Baldwin ushr v6.4s,v4.4s,#7 1597bc3d5698SJohn Baldwin eor w15,w7,w7,ror#11 1598bc3d5698SJohn Baldwin ushr v5.4s,v4.4s,#3 1599bc3d5698SJohn Baldwin add w6,w6,w12 1600bc3d5698SJohn Baldwin add v3.4s,v3.4s,v7.4s 1601bc3d5698SJohn Baldwin ror w11,w11,#6 1602bc3d5698SJohn Baldwin sli v6.4s,v4.4s,#25 1603bc3d5698SJohn Baldwin eor w13,w7,w8 1604bc3d5698SJohn Baldwin eor w15,w15,w7,ror#20 1605bc3d5698SJohn Baldwin ushr v7.4s,v4.4s,#18 1606bc3d5698SJohn Baldwin add w6,w6,w11 1607bc3d5698SJohn Baldwin ldr w12,[sp,#52] 1608bc3d5698SJohn Baldwin and w14,w14,w13 1609bc3d5698SJohn Baldwin eor v5.16b,v5.16b,v6.16b 1610bc3d5698SJohn Baldwin ror w15,w15,#2 1611bc3d5698SJohn Baldwin add w10,w10,w6 1612bc3d5698SJohn Baldwin sli v7.4s,v4.4s,#14 1613bc3d5698SJohn Baldwin eor w14,w14,w8 1614bc3d5698SJohn Baldwin ushr v16.4s,v19.4s,#17 1615bc3d5698SJohn Baldwin add w5,w5,w12 1616bc3d5698SJohn Baldwin add w6,w6,w15 1617bc3d5698SJohn Baldwin and w12,w3,w10 1618bc3d5698SJohn Baldwin eor v5.16b,v5.16b,v7.16b 1619bc3d5698SJohn Baldwin bic w15,w4,w10 1620bc3d5698SJohn Baldwin eor w11,w10,w10,ror#5 1621bc3d5698SJohn Baldwin sli v16.4s,v19.4s,#15 1622bc3d5698SJohn Baldwin add w6,w6,w14 1623bc3d5698SJohn Baldwin orr w12,w12,w15 1624bc3d5698SJohn Baldwin ushr v17.4s,v19.4s,#10 1625bc3d5698SJohn Baldwin eor w11,w11,w10,ror#19 1626bc3d5698SJohn Baldwin eor w15,w6,w6,ror#11 1627bc3d5698SJohn Baldwin ushr v7.4s,v19.4s,#19 1628bc3d5698SJohn Baldwin add w5,w5,w12 1629bc3d5698SJohn Baldwin ror w11,w11,#6 1630bc3d5698SJohn Baldwin add v3.4s,v3.4s,v5.4s 1631bc3d5698SJohn Baldwin eor w14,w6,w7 1632bc3d5698SJohn Baldwin eor w15,w15,w6,ror#20 1633bc3d5698SJohn Baldwin sli v7.4s,v19.4s,#13 1634bc3d5698SJohn Baldwin add w5,w5,w11 1635bc3d5698SJohn Baldwin ldr w12,[sp,#56] 1636bc3d5698SJohn Baldwin and w13,w13,w14 1637bc3d5698SJohn Baldwin eor v17.16b,v17.16b,v16.16b 1638bc3d5698SJohn Baldwin ror w15,w15,#2 1639bc3d5698SJohn Baldwin add w9,w9,w5 1640bc3d5698SJohn Baldwin eor w13,w13,w7 1641bc3d5698SJohn Baldwin eor v17.16b,v17.16b,v7.16b 1642bc3d5698SJohn Baldwin add w4,w4,w12 1643bc3d5698SJohn Baldwin add w5,w5,w15 1644bc3d5698SJohn Baldwin and w12,w10,w9 1645bc3d5698SJohn Baldwin add v3.4s,v3.4s,v17.4s 1646bc3d5698SJohn Baldwin bic w15,w3,w9 1647bc3d5698SJohn Baldwin eor w11,w9,w9,ror#5 1648bc3d5698SJohn Baldwin add w5,w5,w13 1649bc3d5698SJohn Baldwin ushr v18.4s,v3.4s,#17 1650bc3d5698SJohn Baldwin orr w12,w12,w15 1651bc3d5698SJohn Baldwin ushr v19.4s,v3.4s,#10 1652bc3d5698SJohn Baldwin eor w11,w11,w9,ror#19 1653bc3d5698SJohn Baldwin eor w15,w5,w5,ror#11 1654bc3d5698SJohn Baldwin sli v18.4s,v3.4s,#15 1655bc3d5698SJohn Baldwin add w4,w4,w12 1656bc3d5698SJohn Baldwin ushr v17.4s,v3.4s,#19 1657bc3d5698SJohn Baldwin ror w11,w11,#6 1658bc3d5698SJohn Baldwin eor w13,w5,w6 1659bc3d5698SJohn Baldwin eor v19.16b,v19.16b,v18.16b 1660bc3d5698SJohn Baldwin eor w15,w15,w5,ror#20 1661bc3d5698SJohn Baldwin add w4,w4,w11 1662bc3d5698SJohn Baldwin sli v17.4s,v3.4s,#13 1663bc3d5698SJohn Baldwin ldr w12,[sp,#60] 1664bc3d5698SJohn Baldwin and w14,w14,w13 1665bc3d5698SJohn Baldwin ror w15,w15,#2 1666bc3d5698SJohn Baldwin ld1 {v4.4s},[x16], #16 1667bc3d5698SJohn Baldwin add w8,w8,w4 1668bc3d5698SJohn Baldwin eor v19.16b,v19.16b,v17.16b 1669bc3d5698SJohn Baldwin eor w14,w14,w6 1670bc3d5698SJohn Baldwin eor v17.16b,v17.16b,v17.16b 1671bc3d5698SJohn Baldwin add w3,w3,w12 1672bc3d5698SJohn Baldwin add w4,w4,w15 1673bc3d5698SJohn Baldwin and w12,w9,w8 1674bc3d5698SJohn Baldwin mov v17.d[1],v19.d[0] 1675bc3d5698SJohn Baldwin bic w15,w10,w8 1676bc3d5698SJohn Baldwin eor w11,w8,w8,ror#5 1677bc3d5698SJohn Baldwin add w4,w4,w14 1678bc3d5698SJohn Baldwin add v3.4s,v3.4s,v17.4s 1679bc3d5698SJohn Baldwin orr w12,w12,w15 1680bc3d5698SJohn Baldwin eor w11,w11,w8,ror#19 1681bc3d5698SJohn Baldwin eor w15,w4,w4,ror#11 1682bc3d5698SJohn Baldwin add v4.4s,v4.4s,v3.4s 1683bc3d5698SJohn Baldwin add w3,w3,w12 1684bc3d5698SJohn Baldwin ror w11,w11,#6 1685bc3d5698SJohn Baldwin eor w14,w4,w5 1686bc3d5698SJohn Baldwin eor w15,w15,w4,ror#20 1687bc3d5698SJohn Baldwin add w3,w3,w11 1688bc3d5698SJohn Baldwin ldr w12,[x16] 1689bc3d5698SJohn Baldwin and w13,w13,w14 1690bc3d5698SJohn Baldwin ror w15,w15,#2 1691bc3d5698SJohn Baldwin add w7,w7,w3 1692bc3d5698SJohn Baldwin eor w13,w13,w5 1693bc3d5698SJohn Baldwin st1 {v4.4s},[x17], #16 1694bc3d5698SJohn Baldwin cmp w12,#0 // check for K256 terminator 1695bc3d5698SJohn Baldwin ldr w12,[sp,#0] 1696bc3d5698SJohn Baldwin sub x17,x17,#64 1697bc3d5698SJohn Baldwin bne .L_00_48 1698bc3d5698SJohn Baldwin 1699bc3d5698SJohn Baldwin sub x16,x16,#256 // rewind x16 1700bc3d5698SJohn Baldwin cmp x1,x2 1701bc3d5698SJohn Baldwin mov x17, #64 1702bc3d5698SJohn Baldwin csel x17, x17, xzr, eq 1703bc3d5698SJohn Baldwin sub x1,x1,x17 // avoid SEGV 1704bc3d5698SJohn Baldwin mov x17,sp 1705bc3d5698SJohn Baldwin add w10,w10,w12 1706bc3d5698SJohn Baldwin add w3,w3,w15 1707bc3d5698SJohn Baldwin and w12,w8,w7 1708bc3d5698SJohn Baldwin ld1 {v0.16b},[x1],#16 1709bc3d5698SJohn Baldwin bic w15,w9,w7 1710bc3d5698SJohn Baldwin eor w11,w7,w7,ror#5 1711bc3d5698SJohn Baldwin ld1 {v4.4s},[x16],#16 1712bc3d5698SJohn Baldwin add w3,w3,w13 1713bc3d5698SJohn Baldwin orr w12,w12,w15 1714bc3d5698SJohn Baldwin eor w11,w11,w7,ror#19 1715bc3d5698SJohn Baldwin eor w15,w3,w3,ror#11 1716bc3d5698SJohn Baldwin rev32 v0.16b,v0.16b 1717bc3d5698SJohn Baldwin add w10,w10,w12 1718bc3d5698SJohn Baldwin ror w11,w11,#6 1719bc3d5698SJohn Baldwin eor w13,w3,w4 1720bc3d5698SJohn Baldwin eor w15,w15,w3,ror#20 1721bc3d5698SJohn Baldwin add v4.4s,v4.4s,v0.4s 1722bc3d5698SJohn Baldwin add w10,w10,w11 1723bc3d5698SJohn Baldwin ldr w12,[sp,#4] 1724bc3d5698SJohn Baldwin and w14,w14,w13 1725bc3d5698SJohn Baldwin ror w15,w15,#2 1726bc3d5698SJohn Baldwin add w6,w6,w10 1727bc3d5698SJohn Baldwin eor w14,w14,w4 1728bc3d5698SJohn Baldwin add w9,w9,w12 1729bc3d5698SJohn Baldwin add w10,w10,w15 1730bc3d5698SJohn Baldwin and w12,w7,w6 1731bc3d5698SJohn Baldwin bic w15,w8,w6 1732bc3d5698SJohn Baldwin eor w11,w6,w6,ror#5 1733bc3d5698SJohn Baldwin add w10,w10,w14 1734bc3d5698SJohn Baldwin orr w12,w12,w15 1735bc3d5698SJohn Baldwin eor w11,w11,w6,ror#19 1736bc3d5698SJohn Baldwin eor w15,w10,w10,ror#11 1737bc3d5698SJohn Baldwin add w9,w9,w12 1738bc3d5698SJohn Baldwin ror w11,w11,#6 1739bc3d5698SJohn Baldwin eor w14,w10,w3 1740bc3d5698SJohn Baldwin eor w15,w15,w10,ror#20 1741bc3d5698SJohn Baldwin add w9,w9,w11 1742bc3d5698SJohn Baldwin ldr w12,[sp,#8] 1743bc3d5698SJohn Baldwin and w13,w13,w14 1744bc3d5698SJohn Baldwin ror w15,w15,#2 1745bc3d5698SJohn Baldwin add w5,w5,w9 1746bc3d5698SJohn Baldwin eor w13,w13,w3 1747bc3d5698SJohn Baldwin add w8,w8,w12 1748bc3d5698SJohn Baldwin add w9,w9,w15 1749bc3d5698SJohn Baldwin and w12,w6,w5 1750bc3d5698SJohn Baldwin bic w15,w7,w5 1751bc3d5698SJohn Baldwin eor w11,w5,w5,ror#5 1752bc3d5698SJohn Baldwin add w9,w9,w13 1753bc3d5698SJohn Baldwin orr w12,w12,w15 1754bc3d5698SJohn Baldwin eor w11,w11,w5,ror#19 1755bc3d5698SJohn Baldwin eor w15,w9,w9,ror#11 1756bc3d5698SJohn Baldwin add w8,w8,w12 1757bc3d5698SJohn Baldwin ror w11,w11,#6 1758bc3d5698SJohn Baldwin eor w13,w9,w10 1759bc3d5698SJohn Baldwin eor w15,w15,w9,ror#20 1760bc3d5698SJohn Baldwin add w8,w8,w11 1761bc3d5698SJohn Baldwin ldr w12,[sp,#12] 1762bc3d5698SJohn Baldwin and w14,w14,w13 1763bc3d5698SJohn Baldwin ror w15,w15,#2 1764bc3d5698SJohn Baldwin add w4,w4,w8 1765bc3d5698SJohn Baldwin eor w14,w14,w10 1766bc3d5698SJohn Baldwin add w7,w7,w12 1767bc3d5698SJohn Baldwin add w8,w8,w15 1768bc3d5698SJohn Baldwin and w12,w5,w4 1769bc3d5698SJohn Baldwin bic w15,w6,w4 1770bc3d5698SJohn Baldwin eor w11,w4,w4,ror#5 1771bc3d5698SJohn Baldwin add w8,w8,w14 1772bc3d5698SJohn Baldwin orr w12,w12,w15 1773bc3d5698SJohn Baldwin eor w11,w11,w4,ror#19 1774bc3d5698SJohn Baldwin eor w15,w8,w8,ror#11 1775bc3d5698SJohn Baldwin add w7,w7,w12 1776bc3d5698SJohn Baldwin ror w11,w11,#6 1777bc3d5698SJohn Baldwin eor w14,w8,w9 1778bc3d5698SJohn Baldwin eor w15,w15,w8,ror#20 1779bc3d5698SJohn Baldwin add w7,w7,w11 1780bc3d5698SJohn Baldwin ldr w12,[sp,#16] 1781bc3d5698SJohn Baldwin and w13,w13,w14 1782bc3d5698SJohn Baldwin ror w15,w15,#2 1783bc3d5698SJohn Baldwin add w3,w3,w7 1784bc3d5698SJohn Baldwin eor w13,w13,w9 1785bc3d5698SJohn Baldwin st1 {v4.4s},[x17], #16 1786bc3d5698SJohn Baldwin add w6,w6,w12 1787bc3d5698SJohn Baldwin add w7,w7,w15 1788bc3d5698SJohn Baldwin and w12,w4,w3 1789bc3d5698SJohn Baldwin ld1 {v1.16b},[x1],#16 1790bc3d5698SJohn Baldwin bic w15,w5,w3 1791bc3d5698SJohn Baldwin eor w11,w3,w3,ror#5 1792bc3d5698SJohn Baldwin ld1 {v4.4s},[x16],#16 1793bc3d5698SJohn Baldwin add w7,w7,w13 1794bc3d5698SJohn Baldwin orr w12,w12,w15 1795bc3d5698SJohn Baldwin eor w11,w11,w3,ror#19 1796bc3d5698SJohn Baldwin eor w15,w7,w7,ror#11 1797bc3d5698SJohn Baldwin rev32 v1.16b,v1.16b 1798bc3d5698SJohn Baldwin add w6,w6,w12 1799bc3d5698SJohn Baldwin ror w11,w11,#6 1800bc3d5698SJohn Baldwin eor w13,w7,w8 1801bc3d5698SJohn Baldwin eor w15,w15,w7,ror#20 1802bc3d5698SJohn Baldwin add v4.4s,v4.4s,v1.4s 1803bc3d5698SJohn Baldwin add w6,w6,w11 1804bc3d5698SJohn Baldwin ldr w12,[sp,#20] 1805bc3d5698SJohn Baldwin and w14,w14,w13 1806bc3d5698SJohn Baldwin ror w15,w15,#2 1807bc3d5698SJohn Baldwin add w10,w10,w6 1808bc3d5698SJohn Baldwin eor w14,w14,w8 1809bc3d5698SJohn Baldwin add w5,w5,w12 1810bc3d5698SJohn Baldwin add w6,w6,w15 1811bc3d5698SJohn Baldwin and w12,w3,w10 1812bc3d5698SJohn Baldwin bic w15,w4,w10 1813bc3d5698SJohn Baldwin eor w11,w10,w10,ror#5 1814bc3d5698SJohn Baldwin add w6,w6,w14 1815bc3d5698SJohn Baldwin orr w12,w12,w15 1816bc3d5698SJohn Baldwin eor w11,w11,w10,ror#19 1817bc3d5698SJohn Baldwin eor w15,w6,w6,ror#11 1818bc3d5698SJohn Baldwin add w5,w5,w12 1819bc3d5698SJohn Baldwin ror w11,w11,#6 1820bc3d5698SJohn Baldwin eor w14,w6,w7 1821bc3d5698SJohn Baldwin eor w15,w15,w6,ror#20 1822bc3d5698SJohn Baldwin add w5,w5,w11 1823bc3d5698SJohn Baldwin ldr w12,[sp,#24] 1824bc3d5698SJohn Baldwin and w13,w13,w14 1825bc3d5698SJohn Baldwin ror w15,w15,#2 1826bc3d5698SJohn Baldwin add w9,w9,w5 1827bc3d5698SJohn Baldwin eor w13,w13,w7 1828bc3d5698SJohn Baldwin add w4,w4,w12 1829bc3d5698SJohn Baldwin add w5,w5,w15 1830bc3d5698SJohn Baldwin and w12,w10,w9 1831bc3d5698SJohn Baldwin bic w15,w3,w9 1832bc3d5698SJohn Baldwin eor w11,w9,w9,ror#5 1833bc3d5698SJohn Baldwin add w5,w5,w13 1834bc3d5698SJohn Baldwin orr w12,w12,w15 1835bc3d5698SJohn Baldwin eor w11,w11,w9,ror#19 1836bc3d5698SJohn Baldwin eor w15,w5,w5,ror#11 1837bc3d5698SJohn Baldwin add w4,w4,w12 1838bc3d5698SJohn Baldwin ror w11,w11,#6 1839bc3d5698SJohn Baldwin eor w13,w5,w6 1840bc3d5698SJohn Baldwin eor w15,w15,w5,ror#20 1841bc3d5698SJohn Baldwin add w4,w4,w11 1842bc3d5698SJohn Baldwin ldr w12,[sp,#28] 1843bc3d5698SJohn Baldwin and w14,w14,w13 1844bc3d5698SJohn Baldwin ror w15,w15,#2 1845bc3d5698SJohn Baldwin add w8,w8,w4 1846bc3d5698SJohn Baldwin eor w14,w14,w6 1847bc3d5698SJohn Baldwin add w3,w3,w12 1848bc3d5698SJohn Baldwin add w4,w4,w15 1849bc3d5698SJohn Baldwin and w12,w9,w8 1850bc3d5698SJohn Baldwin bic w15,w10,w8 1851bc3d5698SJohn Baldwin eor w11,w8,w8,ror#5 1852bc3d5698SJohn Baldwin add w4,w4,w14 1853bc3d5698SJohn Baldwin orr w12,w12,w15 1854bc3d5698SJohn Baldwin eor w11,w11,w8,ror#19 1855bc3d5698SJohn Baldwin eor w15,w4,w4,ror#11 1856bc3d5698SJohn Baldwin add w3,w3,w12 1857bc3d5698SJohn Baldwin ror w11,w11,#6 1858bc3d5698SJohn Baldwin eor w14,w4,w5 1859bc3d5698SJohn Baldwin eor w15,w15,w4,ror#20 1860bc3d5698SJohn Baldwin add w3,w3,w11 1861bc3d5698SJohn Baldwin ldr w12,[sp,#32] 1862bc3d5698SJohn Baldwin and w13,w13,w14 1863bc3d5698SJohn Baldwin ror w15,w15,#2 1864bc3d5698SJohn Baldwin add w7,w7,w3 1865bc3d5698SJohn Baldwin eor w13,w13,w5 1866bc3d5698SJohn Baldwin st1 {v4.4s},[x17], #16 1867bc3d5698SJohn Baldwin add w10,w10,w12 1868bc3d5698SJohn Baldwin add w3,w3,w15 1869bc3d5698SJohn Baldwin and w12,w8,w7 1870bc3d5698SJohn Baldwin ld1 {v2.16b},[x1],#16 1871bc3d5698SJohn Baldwin bic w15,w9,w7 1872bc3d5698SJohn Baldwin eor w11,w7,w7,ror#5 1873bc3d5698SJohn Baldwin ld1 {v4.4s},[x16],#16 1874bc3d5698SJohn Baldwin add w3,w3,w13 1875bc3d5698SJohn Baldwin orr w12,w12,w15 1876bc3d5698SJohn Baldwin eor w11,w11,w7,ror#19 1877bc3d5698SJohn Baldwin eor w15,w3,w3,ror#11 1878bc3d5698SJohn Baldwin rev32 v2.16b,v2.16b 1879bc3d5698SJohn Baldwin add w10,w10,w12 1880bc3d5698SJohn Baldwin ror w11,w11,#6 1881bc3d5698SJohn Baldwin eor w13,w3,w4 1882bc3d5698SJohn Baldwin eor w15,w15,w3,ror#20 1883bc3d5698SJohn Baldwin add v4.4s,v4.4s,v2.4s 1884bc3d5698SJohn Baldwin add w10,w10,w11 1885bc3d5698SJohn Baldwin ldr w12,[sp,#36] 1886bc3d5698SJohn Baldwin and w14,w14,w13 1887bc3d5698SJohn Baldwin ror w15,w15,#2 1888bc3d5698SJohn Baldwin add w6,w6,w10 1889bc3d5698SJohn Baldwin eor w14,w14,w4 1890bc3d5698SJohn Baldwin add w9,w9,w12 1891bc3d5698SJohn Baldwin add w10,w10,w15 1892bc3d5698SJohn Baldwin and w12,w7,w6 1893bc3d5698SJohn Baldwin bic w15,w8,w6 1894bc3d5698SJohn Baldwin eor w11,w6,w6,ror#5 1895bc3d5698SJohn Baldwin add w10,w10,w14 1896bc3d5698SJohn Baldwin orr w12,w12,w15 1897bc3d5698SJohn Baldwin eor w11,w11,w6,ror#19 1898bc3d5698SJohn Baldwin eor w15,w10,w10,ror#11 1899bc3d5698SJohn Baldwin add w9,w9,w12 1900bc3d5698SJohn Baldwin ror w11,w11,#6 1901bc3d5698SJohn Baldwin eor w14,w10,w3 1902bc3d5698SJohn Baldwin eor w15,w15,w10,ror#20 1903bc3d5698SJohn Baldwin add w9,w9,w11 1904bc3d5698SJohn Baldwin ldr w12,[sp,#40] 1905bc3d5698SJohn Baldwin and w13,w13,w14 1906bc3d5698SJohn Baldwin ror w15,w15,#2 1907bc3d5698SJohn Baldwin add w5,w5,w9 1908bc3d5698SJohn Baldwin eor w13,w13,w3 1909bc3d5698SJohn Baldwin add w8,w8,w12 1910bc3d5698SJohn Baldwin add w9,w9,w15 1911bc3d5698SJohn Baldwin and w12,w6,w5 1912bc3d5698SJohn Baldwin bic w15,w7,w5 1913bc3d5698SJohn Baldwin eor w11,w5,w5,ror#5 1914bc3d5698SJohn Baldwin add w9,w9,w13 1915bc3d5698SJohn Baldwin orr w12,w12,w15 1916bc3d5698SJohn Baldwin eor w11,w11,w5,ror#19 1917bc3d5698SJohn Baldwin eor w15,w9,w9,ror#11 1918bc3d5698SJohn Baldwin add w8,w8,w12 1919bc3d5698SJohn Baldwin ror w11,w11,#6 1920bc3d5698SJohn Baldwin eor w13,w9,w10 1921bc3d5698SJohn Baldwin eor w15,w15,w9,ror#20 1922bc3d5698SJohn Baldwin add w8,w8,w11 1923bc3d5698SJohn Baldwin ldr w12,[sp,#44] 1924bc3d5698SJohn Baldwin and w14,w14,w13 1925bc3d5698SJohn Baldwin ror w15,w15,#2 1926bc3d5698SJohn Baldwin add w4,w4,w8 1927bc3d5698SJohn Baldwin eor w14,w14,w10 1928bc3d5698SJohn Baldwin add w7,w7,w12 1929bc3d5698SJohn Baldwin add w8,w8,w15 1930bc3d5698SJohn Baldwin and w12,w5,w4 1931bc3d5698SJohn Baldwin bic w15,w6,w4 1932bc3d5698SJohn Baldwin eor w11,w4,w4,ror#5 1933bc3d5698SJohn Baldwin add w8,w8,w14 1934bc3d5698SJohn Baldwin orr w12,w12,w15 1935bc3d5698SJohn Baldwin eor w11,w11,w4,ror#19 1936bc3d5698SJohn Baldwin eor w15,w8,w8,ror#11 1937bc3d5698SJohn Baldwin add w7,w7,w12 1938bc3d5698SJohn Baldwin ror w11,w11,#6 1939bc3d5698SJohn Baldwin eor w14,w8,w9 1940bc3d5698SJohn Baldwin eor w15,w15,w8,ror#20 1941bc3d5698SJohn Baldwin add w7,w7,w11 1942bc3d5698SJohn Baldwin ldr w12,[sp,#48] 1943bc3d5698SJohn Baldwin and w13,w13,w14 1944bc3d5698SJohn Baldwin ror w15,w15,#2 1945bc3d5698SJohn Baldwin add w3,w3,w7 1946bc3d5698SJohn Baldwin eor w13,w13,w9 1947bc3d5698SJohn Baldwin st1 {v4.4s},[x17], #16 1948bc3d5698SJohn Baldwin add w6,w6,w12 1949bc3d5698SJohn Baldwin add w7,w7,w15 1950bc3d5698SJohn Baldwin and w12,w4,w3 1951bc3d5698SJohn Baldwin ld1 {v3.16b},[x1],#16 1952bc3d5698SJohn Baldwin bic w15,w5,w3 1953bc3d5698SJohn Baldwin eor w11,w3,w3,ror#5 1954bc3d5698SJohn Baldwin ld1 {v4.4s},[x16],#16 1955bc3d5698SJohn Baldwin add w7,w7,w13 1956bc3d5698SJohn Baldwin orr w12,w12,w15 1957bc3d5698SJohn Baldwin eor w11,w11,w3,ror#19 1958bc3d5698SJohn Baldwin eor w15,w7,w7,ror#11 1959bc3d5698SJohn Baldwin rev32 v3.16b,v3.16b 1960bc3d5698SJohn Baldwin add w6,w6,w12 1961bc3d5698SJohn Baldwin ror w11,w11,#6 1962bc3d5698SJohn Baldwin eor w13,w7,w8 1963bc3d5698SJohn Baldwin eor w15,w15,w7,ror#20 1964bc3d5698SJohn Baldwin add v4.4s,v4.4s,v3.4s 1965bc3d5698SJohn Baldwin add w6,w6,w11 1966bc3d5698SJohn Baldwin ldr w12,[sp,#52] 1967bc3d5698SJohn Baldwin and w14,w14,w13 1968bc3d5698SJohn Baldwin ror w15,w15,#2 1969bc3d5698SJohn Baldwin add w10,w10,w6 1970bc3d5698SJohn Baldwin eor w14,w14,w8 1971bc3d5698SJohn Baldwin add w5,w5,w12 1972bc3d5698SJohn Baldwin add w6,w6,w15 1973bc3d5698SJohn Baldwin and w12,w3,w10 1974bc3d5698SJohn Baldwin bic w15,w4,w10 1975bc3d5698SJohn Baldwin eor w11,w10,w10,ror#5 1976bc3d5698SJohn Baldwin add w6,w6,w14 1977bc3d5698SJohn Baldwin orr w12,w12,w15 1978bc3d5698SJohn Baldwin eor w11,w11,w10,ror#19 1979bc3d5698SJohn Baldwin eor w15,w6,w6,ror#11 1980bc3d5698SJohn Baldwin add w5,w5,w12 1981bc3d5698SJohn Baldwin ror w11,w11,#6 1982bc3d5698SJohn Baldwin eor w14,w6,w7 1983bc3d5698SJohn Baldwin eor w15,w15,w6,ror#20 1984bc3d5698SJohn Baldwin add w5,w5,w11 1985bc3d5698SJohn Baldwin ldr w12,[sp,#56] 1986bc3d5698SJohn Baldwin and w13,w13,w14 1987bc3d5698SJohn Baldwin ror w15,w15,#2 1988bc3d5698SJohn Baldwin add w9,w9,w5 1989bc3d5698SJohn Baldwin eor w13,w13,w7 1990bc3d5698SJohn Baldwin add w4,w4,w12 1991bc3d5698SJohn Baldwin add w5,w5,w15 1992bc3d5698SJohn Baldwin and w12,w10,w9 1993bc3d5698SJohn Baldwin bic w15,w3,w9 1994bc3d5698SJohn Baldwin eor w11,w9,w9,ror#5 1995bc3d5698SJohn Baldwin add w5,w5,w13 1996bc3d5698SJohn Baldwin orr w12,w12,w15 1997bc3d5698SJohn Baldwin eor w11,w11,w9,ror#19 1998bc3d5698SJohn Baldwin eor w15,w5,w5,ror#11 1999bc3d5698SJohn Baldwin add w4,w4,w12 2000bc3d5698SJohn Baldwin ror w11,w11,#6 2001bc3d5698SJohn Baldwin eor w13,w5,w6 2002bc3d5698SJohn Baldwin eor w15,w15,w5,ror#20 2003bc3d5698SJohn Baldwin add w4,w4,w11 2004bc3d5698SJohn Baldwin ldr w12,[sp,#60] 2005bc3d5698SJohn Baldwin and w14,w14,w13 2006bc3d5698SJohn Baldwin ror w15,w15,#2 2007bc3d5698SJohn Baldwin add w8,w8,w4 2008bc3d5698SJohn Baldwin eor w14,w14,w6 2009bc3d5698SJohn Baldwin add w3,w3,w12 2010bc3d5698SJohn Baldwin add w4,w4,w15 2011bc3d5698SJohn Baldwin and w12,w9,w8 2012bc3d5698SJohn Baldwin bic w15,w10,w8 2013bc3d5698SJohn Baldwin eor w11,w8,w8,ror#5 2014bc3d5698SJohn Baldwin add w4,w4,w14 2015bc3d5698SJohn Baldwin orr w12,w12,w15 2016bc3d5698SJohn Baldwin eor w11,w11,w8,ror#19 2017bc3d5698SJohn Baldwin eor w15,w4,w4,ror#11 2018bc3d5698SJohn Baldwin add w3,w3,w12 2019bc3d5698SJohn Baldwin ror w11,w11,#6 2020bc3d5698SJohn Baldwin eor w14,w4,w5 2021bc3d5698SJohn Baldwin eor w15,w15,w4,ror#20 2022bc3d5698SJohn Baldwin add w3,w3,w11 2023bc3d5698SJohn Baldwin and w13,w13,w14 2024bc3d5698SJohn Baldwin ror w15,w15,#2 2025bc3d5698SJohn Baldwin add w7,w7,w3 2026bc3d5698SJohn Baldwin eor w13,w13,w5 2027bc3d5698SJohn Baldwin st1 {v4.4s},[x17], #16 2028bc3d5698SJohn Baldwin add w3,w3,w15 // h+=Sigma0(a) from the past 2029bc3d5698SJohn Baldwin ldp w11,w12,[x0,#0] 2030bc3d5698SJohn Baldwin add w3,w3,w13 // h+=Maj(a,b,c) from the past 2031bc3d5698SJohn Baldwin ldp w13,w14,[x0,#8] 2032bc3d5698SJohn Baldwin add w3,w3,w11 // accumulate 2033bc3d5698SJohn Baldwin add w4,w4,w12 2034bc3d5698SJohn Baldwin ldp w11,w12,[x0,#16] 2035bc3d5698SJohn Baldwin add w5,w5,w13 2036bc3d5698SJohn Baldwin add w6,w6,w14 2037bc3d5698SJohn Baldwin ldp w13,w14,[x0,#24] 2038bc3d5698SJohn Baldwin add w7,w7,w11 2039bc3d5698SJohn Baldwin add w8,w8,w12 2040bc3d5698SJohn Baldwin ldr w12,[sp,#0] 2041bc3d5698SJohn Baldwin stp w3,w4,[x0,#0] 2042bc3d5698SJohn Baldwin add w9,w9,w13 2043bc3d5698SJohn Baldwin mov w13,wzr 2044bc3d5698SJohn Baldwin stp w5,w6,[x0,#8] 2045bc3d5698SJohn Baldwin add w10,w10,w14 2046bc3d5698SJohn Baldwin stp w7,w8,[x0,#16] 2047bc3d5698SJohn Baldwin eor w14,w4,w5 2048bc3d5698SJohn Baldwin stp w9,w10,[x0,#24] 2049bc3d5698SJohn Baldwin mov w15,wzr 2050bc3d5698SJohn Baldwin mov x17,sp 2051bc3d5698SJohn Baldwin b.ne .L_00_48 2052bc3d5698SJohn Baldwin 2053bc3d5698SJohn Baldwin ldr x29,[x29] 2054bc3d5698SJohn Baldwin add sp,sp,#16*4+16 2055bc3d5698SJohn Baldwin ret 2056bc3d5698SJohn Baldwin.size sha256_block_neon,.-sha256_block_neon 2057