1// Copyright 2016 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Based on CRYPTOGAMS code with the following comment: 6// # ==================================================================== 7// # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 8// # project. The module is, however, dual licensed under OpenSSL and 9// # CRYPTOGAMS licenses depending on where you obtain it. For further 10// # details see http://www.openssl.org/~appro/cryptogams/. 11// # ==================================================================== 12 13// Original code can be found at the link below: 14// https://github.com/dot-asm/cryptogams/blob/master/ppc/aesp8-ppc.pl 15 16// Some function names were changed to be consistent with Go function 17// names. For instance, function aes_p8_set_{en,de}crypt_key become 18// set{En,De}cryptKeyAsm. I also split setEncryptKeyAsm in two parts 19// and a new session was created (doEncryptKeyAsm). This was necessary to 20// avoid arguments overwriting when setDecryptKeyAsm calls setEncryptKeyAsm. 21// There were other modifications as well but kept the same functionality. 22 23#include "textflag.h" 24 25// For set{En,De}cryptKeyAsm 26#define INP R3 27#define BITS R4 28#define OUT R5 29#define PTR R6 30#define CNT R7 31#define ROUNDS R8 32#define TEMP R19 33#define ZERO V0 34#define IN0 V1 35#define IN1 V2 36#define KEY V3 37#define RCON V4 38#define MASK V5 39#define TMP V6 40#define STAGE V7 41#define OUTPERM V8 42#define OUTMASK V9 43#define OUTHEAD V10 44#define OUTTAIL V11 45 46// For {en,de}cryptBlockAsm 47#define BLK_INP R3 48#define BLK_OUT R4 49#define BLK_KEY R5 50#define BLK_ROUNDS R6 51#define BLK_IDX R7 52 53DATA ·rcon+0x00(SB)/8, $0x0100000001000000 // RCON 54DATA ·rcon+0x08(SB)/8, $0x0100000001000000 // RCON 55DATA ·rcon+0x10(SB)/8, $0x1b0000001b000000 56DATA ·rcon+0x18(SB)/8, $0x1b0000001b000000 57DATA ·rcon+0x20(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK 58DATA ·rcon+0x28(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK 59DATA ·rcon+0x30(SB)/8, $0x0000000000000000 60DATA ·rcon+0x38(SB)/8, $0x0000000000000000 61GLOBL ·rcon(SB), RODATA, $64 62 63// func setEncryptKeyAsm(key *byte, keylen int, enc *uint32) int 64TEXT ·setEncryptKeyAsm(SB), NOSPLIT|NOFRAME, $0 65 // Load the arguments inside the registers 66 MOVD key+0(FP), INP 67 MOVD keylen+8(FP), BITS 68 MOVD enc+16(FP), OUT 69 JMP ·doEncryptKeyAsm(SB) 70 71// This text is used both setEncryptKeyAsm and setDecryptKeyAsm 72TEXT ·doEncryptKeyAsm(SB), NOSPLIT|NOFRAME, $0 73 // Do not change R10 since it's storing the LR value in setDecryptKeyAsm 74 75 // Check arguments 76 MOVD $-1, PTR // li 6,-1 exit code to -1 (255) 77 CMPU INP, $0 // cmpldi r3,0 input key pointer set? 78 BC 0x0E, 2, enc_key_abort // beq- .Lenc_key_abort 79 CMPU OUT, $0 // cmpldi r5,0 output key pointer set? 80 BC 0x0E, 2, enc_key_abort // beq- .Lenc_key_abort 81 MOVD $-2, PTR // li 6,-2 exit code to -2 (254) 82 CMPW BITS, $128 // cmpwi 4,128 greater or equal to 128 83 BC 0x0E, 0, enc_key_abort // blt- .Lenc_key_abort 84 CMPW BITS, $256 // cmpwi 4,256 lesser or equal to 256 85 BC 0x0E, 1, enc_key_abort // bgt- .Lenc_key_abort 86 ANDCC $0x3f, BITS, TEMP // andi. 0,4,0x3f multiple of 64 87 BC 0x06, 2, enc_key_abort // bne- .Lenc_key_abort 88 89 MOVD $·rcon(SB), PTR // PTR point to rcon addr 90 91 // Get key from memory and write aligned into VR 92 NEG INP, R9 // neg 9,3 R9 is ~INP + 1 93 LVX (INP)(R0), IN0 // lvx 1,0,3 Load key inside IN0 94 ADD $15, INP, INP // addi 3,3,15 Add 15B to INP addr 95 LVSR (R9)(R0), KEY // lvsr 3,0,9 96 MOVD $0x20, R8 // li 8,0x20 R8 = 32 97 CMPW BITS, $192 // cmpwi 4,192 Key size == 192? 98 LVX (INP)(R0), IN1 // lvx 2,0,3 99 VSPLTISB $0x0f, MASK// vspltisb 5,0x0f 0x0f0f0f0f... mask 100 LVX (PTR)(R0), RCON // lvx 4,0,6 Load first 16 bytes into RCON 101 VXOR KEY, MASK, KEY // vxor 3,3,5 Adjust for byte swap 102 LVX (PTR)(R8), MASK // lvx 5,8,6 103 ADD $0x10, PTR, PTR // addi 6,6,0x10 PTR to next 16 bytes of RCON 104 VPERM IN0, IN1, KEY, IN0 // vperm 1,1,2,3 Align 105 MOVD $8, CNT // li 7,8 CNT = 8 106 VXOR ZERO, ZERO, ZERO // vxor 0,0,0 Zero to be zero :) 107 MOVD CNT, CTR // mtctr 7 Set the counter to 8 (rounds) 108 109 LVSL (OUT)(R0), OUTPERM // lvsl 8,0,5 110 VSPLTISB $-1, OUTMASK // vspltisb 9,-1 111 LVX (OUT)(R0), OUTHEAD // lvx 10,0,5 112 VPERM OUTMASK, ZERO, OUTPERM, OUTMASK // vperm 9,9,0,8 113 114 BLT loop128 // blt .Loop128 115 ADD $8, INP, INP // addi 3,3,8 116 BEQ l192 // beq .L192 117 ADD $8, INP, INP // addi 3,3,8 118 JMP l256 // b .L256 119 120loop128: 121 // Key schedule (Round 1 to 8) 122 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat 123 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 124 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate 125 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 126 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 127 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 128 STVX STAGE, (OUT+R0) // stvx 7,0,5 Write to output 129 ADD $16, OUT, OUT // addi 5,5,16 Point to the next round 130 131 VXOR IN0, TMP, IN0 // vxor 1,1,6 132 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 133 VXOR IN0, TMP, IN0 // vxor 1,1,6 134 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 135 VXOR IN0, TMP, IN0 // vxor 1,1,6 136 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 137 VXOR IN0, KEY, IN0 // vxor 1,1,3 138 BC 0x10, 0, loop128 // bdnz .Loop128 139 140 LVX (PTR)(R0), RCON // lvx 4,0,6 Last two round keys 141 142 // Key schedule (Round 9) 143 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-spat 144 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 145 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate 146 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 147 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 148 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 149 STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 9 150 ADD $16, OUT, OUT // addi 5,5,16 151 152 // Key schedule (Round 10) 153 VXOR IN0, TMP, IN0 // vxor 1,1,6 154 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 155 VXOR IN0, TMP, IN0 // vxor 1,1,6 156 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 157 VXOR IN0, TMP, IN0 // vxor 1,1,6 158 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 159 VXOR IN0, KEY, IN0 // vxor 1,1,3 160 161 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat 162 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 163 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate 164 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 165 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 166 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 167 STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 10 168 ADD $16, OUT, OUT // addi 5,5,16 169 170 // Key schedule (Round 11) 171 VXOR IN0, TMP, IN0 // vxor 1,1,6 172 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 173 VXOR IN0, TMP, IN0 // vxor 1,1,6 174 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 175 VXOR IN0, TMP, IN0 // vxor 1,1,6 176 VXOR IN0, KEY, IN0 // vxor 1,1,3 177 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 178 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 179 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 180 STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 11 181 182 ADD $15, OUT, INP // addi 3,5,15 183 ADD $0x50, OUT, OUT // addi 5,5,0x50 184 185 MOVD $10, ROUNDS // li 8,10 186 JMP done // b .Ldone 187 188l192: 189 LVX (INP)(R0), TMP // lvx 6,0,3 190 MOVD $4, CNT // li 7,4 191 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 192 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 193 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 194 STVX STAGE, (OUT+R0) // stvx 7,0,5 195 ADD $16, OUT, OUT // addi 5,5,16 196 VPERM IN1, TMP, KEY, IN1 // vperm 2,2,6,3 197 VSPLTISB $8, KEY // vspltisb 3,8 198 MOVD CNT, CTR // mtctr 7 199 VSUBUBM MASK, KEY, MASK // vsububm 5,5,3 200 201loop192: 202 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 203 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 204 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 205 206 VXOR IN0, TMP, IN0 // vxor 1,1,6 207 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 208 VXOR IN0, TMP, IN0 // vxor 1,1,6 209 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 210 VXOR IN0, TMP, IN0 // vxor 1,1,6 211 212 VSLDOI $8, ZERO, IN1, STAGE // vsldoi 7,0,2,8 213 VSPLTW $3, IN0, TMP // vspltw 6,1,3 214 VXOR TMP, IN1, TMP // vxor 6,6,2 215 VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12 216 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 217 VXOR IN1, TMP, IN1 // vxor 2,2,6 218 VXOR IN0, KEY, IN0 // vxor 1,1,3 219 VXOR IN1, KEY, IN1 // vxor 2,2,3 220 VSLDOI $8, STAGE, IN0, STAGE // vsldoi 7,7,1,8 221 222 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 223 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 224 VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8 225 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 226 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 227 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 228 STVX STAGE, (OUT+R0) // stvx 7,0,5 229 ADD $16, OUT, OUT // addi 5,5,16 230 231 VSLDOI $8, IN0, IN1, STAGE // vsldoi 7,1,2,8 232 VXOR IN0, TMP, IN0 // vxor 1,1,6 233 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 234 VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8 235 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 236 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 237 VXOR IN0, TMP, IN0 // vxor 1,1,6 238 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 239 VXOR IN0, TMP, IN0 // vxor 1,1,6 240 STVX STAGE, (OUT+R0) // stvx 7,0,5 241 ADD $16, OUT, OUT // addi 5,5,16 242 243 VSPLTW $3, IN0, TMP // vspltw 6,1,3 244 VXOR TMP, IN1, TMP // vxor 6,6,2 245 VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12 246 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 247 VXOR IN1, TMP, IN1 // vxor 2,2,6 248 VXOR IN0, KEY, IN0 // vxor 1,1,3 249 VXOR IN1, KEY, IN1 // vxor 2,2,3 250 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 251 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 252 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 253 STVX STAGE, (OUT+R0) // stvx 7,0,5 254 ADD $15, OUT, INP // addi 3,5,15 255 ADD $16, OUT, OUT // addi 5,5,16 256 BC 0x10, 0, loop192 // bdnz .Loop192 257 258 MOVD $12, ROUNDS // li 8,12 259 ADD $0x20, OUT, OUT // addi 5,5,0x20 260 BR done // b .Ldone 261 262l256: 263 LVX (INP)(R0), TMP // lvx 6,0,3 264 MOVD $7, CNT // li 7,7 265 MOVD $14, ROUNDS // li 8,14 266 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 267 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 268 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 269 STVX STAGE, (OUT+R0) // stvx 7,0,5 270 ADD $16, OUT, OUT // addi 5,5,16 271 VPERM IN1, TMP, KEY, IN1 // vperm 2,2,6,3 272 MOVD CNT, CTR // mtctr 7 273 274loop256: 275 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 276 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 277 VPERM IN1, IN1, OUTPERM, OUTTAIL // vperm 11,2,2,8 278 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 279 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 280 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 281 STVX STAGE, (OUT+R0) // stvx 7,0,5 282 ADD $16, OUT, OUT // addi 5,5,16 283 284 VXOR IN0, TMP, IN0 // vxor 1,1,6 285 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 286 VXOR IN0, TMP, IN0 // vxor 1,1,6 287 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 288 VXOR IN0, TMP, IN0 // vxor 1,1,6 289 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 290 VXOR IN0, KEY, IN0 // vxor 1,1,3 291 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 292 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 293 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 294 STVX STAGE, (OUT+R0) // stvx 7,0,5 295 ADD $15, OUT, INP // addi 3,5,15 296 ADD $16, OUT, OUT // addi 5,5,16 297 BC 0x12, 0, done // bdz .Ldone 298 299 VSPLTW $3, IN0, KEY // vspltw 3,1,3 300 VSLDOI $12, ZERO, IN1, TMP // vsldoi 6,0,2,12 301 VSBOX KEY, KEY // vsbox 3,3 302 303 VXOR IN1, TMP, IN1 // vxor 2,2,6 304 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 305 VXOR IN1, TMP, IN1 // vxor 2,2,6 306 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 307 VXOR IN1, TMP, IN1 // vxor 2,2,6 308 309 VXOR IN1, KEY, IN1 // vxor 2,2,3 310 JMP loop256 // b .Loop256 311 312done: 313 LVX (INP)(R0), IN1 // lvx 2,0,3 314 VSEL OUTHEAD, IN1, OUTMASK, IN1 // vsel 2,10,2,9 315 STVX IN1, (INP+R0) // stvx 2,0,3 316 MOVD $0, PTR // li 6,0 set PTR to 0 (exit code 0) 317 MOVW ROUNDS, 0(OUT) // stw 8,0(5) 318 319enc_key_abort: 320 MOVD PTR, INP // mr 3,6 set exit code with PTR value 321 MOVD INP, ret+24(FP) // Put return value into the FP 322 RET // blr 323 324// func setDecryptKeyAsm(key *byte, keylen int, dec *uint32) int 325TEXT ·setDecryptKeyAsm(SB), NOSPLIT|NOFRAME, $0 326 // Load the arguments inside the registers 327 MOVD key+0(FP), INP 328 MOVD keylen+8(FP), BITS 329 MOVD dec+16(FP), OUT 330 331 MOVD LR, R10 // mflr 10 332 CALL ·doEncryptKeyAsm(SB) 333 MOVD R10, LR // mtlr 10 334 335 CMPW INP, $0 // cmpwi 3,0 exit 0 = ok 336 BC 0x06, 2, dec_key_abort // bne- .Ldec_key_abort 337 338 // doEncryptKeyAsm set ROUNDS (R8) with the proper value for each mode 339 SLW $4, ROUNDS, CNT // slwi 7,8,4 340 SUB $240, OUT, INP // subi 3,5,240 341 SRW $1, ROUNDS, ROUNDS // srwi 8,8,1 342 ADD R7, INP, OUT // add 5,3,7 343 MOVD ROUNDS, CTR // mtctr 8 344 345 // dec_key will invert the key sequence in order to be used for decrypt 346dec_key: 347 MOVWZ 0(INP), TEMP // lwz 0, 0(3) 348 MOVWZ 4(INP), R6 // lwz 6, 4(3) 349 MOVWZ 8(INP), R7 // lwz 7, 8(3) 350 MOVWZ 12(INP), R8 // lwz 8, 12(3) 351 ADD $16, INP, INP // addi 3,3,16 352 MOVWZ 0(OUT), R9 // lwz 9, 0(5) 353 MOVWZ 4(OUT), R10 // lwz 10,4(5) 354 MOVWZ 8(OUT), R11 // lwz 11,8(5) 355 MOVWZ 12(OUT), R12 // lwz 12,12(5) 356 MOVW TEMP, 0(OUT) // stw 0, 0(5) 357 MOVW R6, 4(OUT) // stw 6, 4(5) 358 MOVW R7, 8(OUT) // stw 7, 8(5) 359 MOVW R8, 12(OUT) // stw 8, 12(5) 360 SUB $16, OUT, OUT // subi 5,5,16 361 MOVW R9, -16(INP) // stw 9, -16(3) 362 MOVW R10, -12(INP) // stw 10,-12(3) 363 MOVW R11, -8(INP) // stw 11,-8(3) 364 MOVW R12, -4(INP) // stw 12,-4(3) 365 BC 0x10, 0, dec_key // bdnz .Ldeckey 366 367 XOR R3, R3, R3 // xor 3,3,3 Clean R3 368 369dec_key_abort: 370 MOVD R3, ret+24(FP) // Put return value into the FP 371 RET // blr 372 373// func encryptBlockAsm(dst, src *byte, enc *uint32) 374TEXT ·encryptBlockAsm(SB), NOSPLIT|NOFRAME, $0 375 // Load the arguments inside the registers 376 MOVD dst+0(FP), BLK_OUT 377 MOVD src+8(FP), BLK_INP 378 MOVD enc+16(FP), BLK_KEY 379 380 MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5) 381 MOVD $15, BLK_IDX // li 7,15 382 383 LVX (BLK_INP)(R0), ZERO // lvx 0,0,3 384 NEG BLK_OUT, R11 // neg 11,4 385 LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3 386 LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3 387 VSPLTISB $0x0f, RCON // vspltisb 4,0x0f 388 LVSR (R11)(R0), KEY // lvsr 3,0,11 389 VXOR IN1, RCON, IN1 // vxor 2,2,4 390 MOVD $16, BLK_IDX // li 7,16 391 VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2 392 LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5 393 LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5 394 SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1 395 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 396 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 397 SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1 398 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 399 400 VXOR ZERO, IN0, ZERO // vxor 0,0,1 401 LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 402 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 403 MOVD BLK_ROUNDS, CTR // mtctr 6 404 405loop_enc: 406 VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 407 VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2 408 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 409 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 410 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 411 VCIPHER ZERO, IN0, ZERO // vcipher 0,0,1 412 LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 413 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 414 BC 0x10, 0, loop_enc // bdnz .Loop_enc 415 416 VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 417 VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2 418 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 419 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 420 VCIPHERLAST ZERO, IN0, ZERO // vcipherlast 0,0,1 421 422 VSPLTISB $-1, IN1 // vspltisb 2,-1 423 VXOR IN0, IN0, IN0 // vxor 1,1,1 424 MOVD $15, BLK_IDX // li 7,15 425 VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3 426 VXOR KEY, RCON, KEY // vxor 3,3,4 427 LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4 428 VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3 429 VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2 430 LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4 431 STVX IN0, (BLK_OUT+R0) // stvx 1,0,4 432 VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2 433 STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4 434 435 RET // blr 436 437// func decryptBlockAsm(dst, src *byte, dec *uint32) 438TEXT ·decryptBlockAsm(SB), NOSPLIT|NOFRAME, $0 439 // Load the arguments inside the registers 440 MOVD dst+0(FP), BLK_OUT 441 MOVD src+8(FP), BLK_INP 442 MOVD dec+16(FP), BLK_KEY 443 444 MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5) 445 MOVD $15, BLK_IDX // li 7,15 446 447 LVX (BLK_INP)(R0), ZERO // lvx 0,0,3 448 NEG BLK_OUT, R11 // neg 11,4 449 LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3 450 LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3 451 VSPLTISB $0x0f, RCON // vspltisb 4,0x0f 452 LVSR (R11)(R0), KEY // lvsr 3,0,11 453 VXOR IN1, RCON, IN1 // vxor 2,2,4 454 MOVD $16, BLK_IDX // li 7,16 455 VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2 456 LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5 457 LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5 458 SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1 459 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 460 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 461 SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1 462 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 463 464 VXOR ZERO, IN0, ZERO // vxor 0,0,1 465 LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 466 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 467 MOVD BLK_ROUNDS, CTR // mtctr 6 468 469loop_dec: 470 VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 471 VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2 472 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 473 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 474 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 475 VNCIPHER ZERO, IN0, ZERO // vncipher 0,0,1 476 LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 477 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 478 BC 0x10, 0, loop_dec // bdnz .Loop_dec 479 480 VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 481 VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2 482 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 483 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 484 VNCIPHERLAST ZERO, IN0, ZERO // vncipherlast 0,0,1 485 486 VSPLTISB $-1, IN1 // vspltisb 2,-1 487 VXOR IN0, IN0, IN0 // vxor 1,1,1 488 MOVD $15, BLK_IDX // li 7,15 489 VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3 490 VXOR KEY, RCON, KEY // vxor 3,3,4 491 LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4 492 VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3 493 VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2 494 LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4 495 STVX IN0, (BLK_OUT+R0) // stvx 1,0,4 496 VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2 497 STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4 498 499 RET // blr 500 501