1#! /usr/bin/env perl 2# Copyright 2012-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# ==================================================================== 11# Written by David S. Miller and Andy Polyakov. 12# The module is licensed under 2-clause BSD license. October 2012. 13# All rights reserved. 14# ==================================================================== 15 16###################################################################### 17# AES for SPARC T4. 18# 19# AES round instructions complete in 3 cycles and can be issued every 20# cycle. It means that round calculations should take 4*rounds cycles, 21# because any given round instruction depends on result of *both* 22# previous instructions: 23# 24# |0 |1 |2 |3 |4 25# |01|01|01| 26# |23|23|23| 27# |01|01|... 28# |23|... 29# 30# Provided that fxor [with IV] takes 3 cycles to complete, critical 31# path length for CBC encrypt would be 3+4*rounds, or in other words 32# it should process one byte in at least (3+4*rounds)/16 cycles. This 33# estimate doesn't account for "collateral" instructions, such as 34# fetching input from memory, xor-ing it with zero-round key and 35# storing the result. Yet, *measured* performance [for data aligned 36# at 64-bit boundary!] deviates from this equation by less than 0.5%: 37# 38# 128-bit key 192- 256- 39# CBC encrypt 2.70/2.90(*) 3.20/3.40 3.70/3.90 40# (*) numbers after slash are for 41# misaligned data. 42# 43# Out-of-order execution logic managed to fully overlap "collateral" 44# instructions with those on critical path. Amazing! 45# 46# As with Intel AES-NI, question is if it's possible to improve 47# performance of parallelizable modes by interleaving round 48# instructions. Provided round instruction latency and throughput 49# optimal interleave factor is 2. But can we expect 2x performance 50# improvement? Well, as round instructions can be issued one per 51# cycle, they don't saturate the 2-way issue pipeline and therefore 52# there is room for "collateral" calculations... Yet, 2x speed-up 53# over CBC encrypt remains unattaintable: 54# 55# 128-bit key 192- 256- 56# CBC decrypt 1.64/2.11 1.89/2.37 2.23/2.61 57# CTR 1.64/2.08(*) 1.89/2.33 2.23/2.61 58# (*) numbers after slash are for 59# misaligned data. 60# 61# Estimates based on amount of instructions under assumption that 62# round instructions are not pairable with any other instruction 63# suggest that latter is the actual case and pipeline runs 64# underutilized. It should be noted that T4 out-of-order execution 65# logic is so capable that performance gain from 2x interleave is 66# not even impressive, ~7-13% over non-interleaved code, largest 67# for 256-bit keys. 68 69# To anchor to something else, software implementation processes 70# one byte in 29 cycles with 128-bit key on same processor. Intel 71# Sandy Bridge encrypts byte in 5.07 cycles in CBC mode and decrypts 72# in 0.93, naturally with AES-NI. 73 74$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 75push(@INC,"${dir}","${dir}../../perlasm"); 76require "sparcv9_modes.pl"; 77 78$output = pop; 79open STDOUT,">$output"; 80 81$::evp=1; # if $evp is set to 0, script generates module with 82# AES_[en|de]crypt, AES_set_[en|de]crypt_key and AES_cbc_encrypt entry 83# points. These however are not fully compatible with openssl/aes.h, 84# because they expect AES_KEY to be aligned at 64-bit boundary. When 85# used through EVP, alignment is arranged at EVP layer. Second thing 86# that is arranged by EVP is at least 32-bit alignment of IV. 87 88###################################################################### 89# single-round subroutines 90# 91{ 92my ($inp,$out,$key,$rounds,$tmp,$mask)=map("%o$_",(0..5)); 93 94$code.=<<___; 95#include "sparc_arch.h" 96 97#ifdef __arch64__ 98.register %g2,#scratch 99.register %g3,#scratch 100#endif 101 102.text 103 104.globl aes_t4_encrypt 105.align 32 106aes_t4_encrypt: 107 andcc $inp, 7, %g1 ! is input aligned? 108 andn $inp, 7, $inp 109 110 ldx [$key + 0], %g4 111 ldx [$key + 8], %g5 112 113 ldx [$inp + 0], %o4 114 bz,pt %icc, 1f 115 ldx [$inp + 8], %o5 116 ldx [$inp + 16], $inp 117 sll %g1, 3, %g1 118 sub %g0, %g1, %o3 119 sllx %o4, %g1, %o4 120 sllx %o5, %g1, %g1 121 srlx %o5, %o3, %o5 122 srlx $inp, %o3, %o3 123 or %o5, %o4, %o4 124 or %o3, %g1, %o5 1251: 126 ld [$key + 240], $rounds 127 ldd [$key + 16], %f12 128 ldd [$key + 24], %f14 129 xor %g4, %o4, %o4 130 xor %g5, %o5, %o5 131 movxtod %o4, %f0 132 movxtod %o5, %f2 133 srl $rounds, 1, $rounds 134 ldd [$key + 32], %f16 135 sub $rounds, 1, $rounds 136 ldd [$key + 40], %f18 137 add $key, 48, $key 138 139.Lenc: 140 aes_eround01 %f12, %f0, %f2, %f4 141 aes_eround23 %f14, %f0, %f2, %f2 142 ldd [$key + 0], %f12 143 ldd [$key + 8], %f14 144 sub $rounds,1,$rounds 145 aes_eround01 %f16, %f4, %f2, %f0 146 aes_eround23 %f18, %f4, %f2, %f2 147 ldd [$key + 16], %f16 148 ldd [$key + 24], %f18 149 brnz,pt $rounds, .Lenc 150 add $key, 32, $key 151 152 andcc $out, 7, $tmp ! is output aligned? 153 aes_eround01 %f12, %f0, %f2, %f4 154 aes_eround23 %f14, %f0, %f2, %f2 155 aes_eround01_l %f16, %f4, %f2, %f0 156 aes_eround23_l %f18, %f4, %f2, %f2 157 158 bnz,pn %icc, 2f 159 nop 160 161 std %f0, [$out + 0] 162 retl 163 std %f2, [$out + 8] 164 1652: alignaddrl $out, %g0, $out 166 mov 0xff, $mask 167 srl $mask, $tmp, $mask 168 169 faligndata %f0, %f0, %f4 170 faligndata %f0, %f2, %f6 171 faligndata %f2, %f2, %f8 172 173 stda %f4, [$out + $mask]0xc0 ! partial store 174 std %f6, [$out + 8] 175 add $out, 16, $out 176 orn %g0, $mask, $mask 177 retl 178 stda %f8, [$out + $mask]0xc0 ! partial store 179.type aes_t4_encrypt,#function 180.size aes_t4_encrypt,.-aes_t4_encrypt 181 182.globl aes_t4_decrypt 183.align 32 184aes_t4_decrypt: 185 andcc $inp, 7, %g1 ! is input aligned? 186 andn $inp, 7, $inp 187 188 ldx [$key + 0], %g4 189 ldx [$key + 8], %g5 190 191 ldx [$inp + 0], %o4 192 bz,pt %icc, 1f 193 ldx [$inp + 8], %o5 194 ldx [$inp + 16], $inp 195 sll %g1, 3, %g1 196 sub %g0, %g1, %o3 197 sllx %o4, %g1, %o4 198 sllx %o5, %g1, %g1 199 srlx %o5, %o3, %o5 200 srlx $inp, %o3, %o3 201 or %o5, %o4, %o4 202 or %o3, %g1, %o5 2031: 204 ld [$key + 240], $rounds 205 ldd [$key + 16], %f12 206 ldd [$key + 24], %f14 207 xor %g4, %o4, %o4 208 xor %g5, %o5, %o5 209 movxtod %o4, %f0 210 movxtod %o5, %f2 211 srl $rounds, 1, $rounds 212 ldd [$key + 32], %f16 213 sub $rounds, 1, $rounds 214 ldd [$key + 40], %f18 215 add $key, 48, $key 216 217.Ldec: 218 aes_dround01 %f12, %f0, %f2, %f4 219 aes_dround23 %f14, %f0, %f2, %f2 220 ldd [$key + 0], %f12 221 ldd [$key + 8], %f14 222 sub $rounds,1,$rounds 223 aes_dround01 %f16, %f4, %f2, %f0 224 aes_dround23 %f18, %f4, %f2, %f2 225 ldd [$key + 16], %f16 226 ldd [$key + 24], %f18 227 brnz,pt $rounds, .Ldec 228 add $key, 32, $key 229 230 andcc $out, 7, $tmp ! is output aligned? 231 aes_dround01 %f12, %f0, %f2, %f4 232 aes_dround23 %f14, %f0, %f2, %f2 233 aes_dround01_l %f16, %f4, %f2, %f0 234 aes_dround23_l %f18, %f4, %f2, %f2 235 236 bnz,pn %icc, 2f 237 nop 238 239 std %f0, [$out + 0] 240 retl 241 std %f2, [$out + 8] 242 2432: alignaddrl $out, %g0, $out 244 mov 0xff, $mask 245 srl $mask, $tmp, $mask 246 247 faligndata %f0, %f0, %f4 248 faligndata %f0, %f2, %f6 249 faligndata %f2, %f2, %f8 250 251 stda %f4, [$out + $mask]0xc0 ! partial store 252 std %f6, [$out + 8] 253 add $out, 16, $out 254 orn %g0, $mask, $mask 255 retl 256 stda %f8, [$out + $mask]0xc0 ! partial store 257.type aes_t4_decrypt,#function 258.size aes_t4_decrypt,.-aes_t4_decrypt 259___ 260} 261 262###################################################################### 263# key setup subroutines 264# 265{ 266my ($inp,$bits,$out,$tmp)=map("%o$_",(0..5)); 267$code.=<<___; 268.globl aes_t4_set_encrypt_key 269.align 32 270aes_t4_set_encrypt_key: 271.Lset_encrypt_key: 272 and $inp, 7, $tmp 273 alignaddr $inp, %g0, $inp 274 cmp $bits, 192 275 ldd [$inp + 0], %f0 276 bl,pt %icc,.L128 277 ldd [$inp + 8], %f2 278 279 be,pt %icc,.L192 280 ldd [$inp + 16], %f4 281 brz,pt $tmp, .L256aligned 282 ldd [$inp + 24], %f6 283 284 ldd [$inp + 32], %f8 285 faligndata %f0, %f2, %f0 286 faligndata %f2, %f4, %f2 287 faligndata %f4, %f6, %f4 288 faligndata %f6, %f8, %f6 289.L256aligned: 290___ 291for ($i=0; $i<6; $i++) { 292 $code.=<<___; 293 std %f0, [$out + `32*$i+0`] 294 aes_kexpand1 %f0, %f6, $i, %f0 295 std %f2, [$out + `32*$i+8`] 296 aes_kexpand2 %f2, %f0, %f2 297 std %f4, [$out + `32*$i+16`] 298 aes_kexpand0 %f4, %f2, %f4 299 std %f6, [$out + `32*$i+24`] 300 aes_kexpand2 %f6, %f4, %f6 301___ 302} 303$code.=<<___; 304 std %f0, [$out + `32*$i+0`] 305 aes_kexpand1 %f0, %f6, $i, %f0 306 std %f2, [$out + `32*$i+8`] 307 aes_kexpand2 %f2, %f0, %f2 308 std %f4, [$out + `32*$i+16`] 309 std %f6, [$out + `32*$i+24`] 310 std %f0, [$out + `32*$i+32`] 311 std %f2, [$out + `32*$i+40`] 312 313 mov 14, $tmp 314 st $tmp, [$out + 240] 315 retl 316 xor %o0, %o0, %o0 317 318.align 16 319.L192: 320 brz,pt $tmp, .L192aligned 321 nop 322 323 ldd [$inp + 24], %f6 324 faligndata %f0, %f2, %f0 325 faligndata %f2, %f4, %f2 326 faligndata %f4, %f6, %f4 327.L192aligned: 328___ 329for ($i=0; $i<7; $i++) { 330 $code.=<<___; 331 std %f0, [$out + `24*$i+0`] 332 aes_kexpand1 %f0, %f4, $i, %f0 333 std %f2, [$out + `24*$i+8`] 334 aes_kexpand2 %f2, %f0, %f2 335 std %f4, [$out + `24*$i+16`] 336 aes_kexpand2 %f4, %f2, %f4 337___ 338} 339$code.=<<___; 340 std %f0, [$out + `24*$i+0`] 341 aes_kexpand1 %f0, %f4, $i, %f0 342 std %f2, [$out + `24*$i+8`] 343 aes_kexpand2 %f2, %f0, %f2 344 std %f4, [$out + `24*$i+16`] 345 std %f0, [$out + `24*$i+24`] 346 std %f2, [$out + `24*$i+32`] 347 348 mov 12, $tmp 349 st $tmp, [$out + 240] 350 retl 351 xor %o0, %o0, %o0 352 353.align 16 354.L128: 355 brz,pt $tmp, .L128aligned 356 nop 357 358 ldd [$inp + 16], %f4 359 faligndata %f0, %f2, %f0 360 faligndata %f2, %f4, %f2 361.L128aligned: 362___ 363for ($i=0; $i<10; $i++) { 364 $code.=<<___; 365 std %f0, [$out + `16*$i+0`] 366 aes_kexpand1 %f0, %f2, $i, %f0 367 std %f2, [$out + `16*$i+8`] 368 aes_kexpand2 %f2, %f0, %f2 369___ 370} 371$code.=<<___; 372 std %f0, [$out + `16*$i+0`] 373 std %f2, [$out + `16*$i+8`] 374 375 mov 10, $tmp 376 st $tmp, [$out + 240] 377 retl 378 xor %o0, %o0, %o0 379.type aes_t4_set_encrypt_key,#function 380.size aes_t4_set_encrypt_key,.-aes_t4_set_encrypt_key 381 382.globl aes_t4_set_decrypt_key 383.align 32 384aes_t4_set_decrypt_key: 385 mov %o7, %o5 386 call .Lset_encrypt_key 387 nop 388 389 mov %o5, %o7 390 sll $tmp, 4, $inp ! $tmp is number of rounds 391 add $tmp, 2, $tmp 392 add $out, $inp, $inp ! $inp=$out+16*rounds 393 srl $tmp, 2, $tmp ! $tmp=(rounds+2)/4 394 395.Lkey_flip: 396 ldd [$out + 0], %f0 397 ldd [$out + 8], %f2 398 ldd [$out + 16], %f4 399 ldd [$out + 24], %f6 400 ldd [$inp + 0], %f8 401 ldd [$inp + 8], %f10 402 ldd [$inp - 16], %f12 403 ldd [$inp - 8], %f14 404 sub $tmp, 1, $tmp 405 std %f0, [$inp + 0] 406 std %f2, [$inp + 8] 407 std %f4, [$inp - 16] 408 std %f6, [$inp - 8] 409 std %f8, [$out + 0] 410 std %f10, [$out + 8] 411 std %f12, [$out + 16] 412 std %f14, [$out + 24] 413 add $out, 32, $out 414 brnz $tmp, .Lkey_flip 415 sub $inp, 32, $inp 416 417 retl 418 xor %o0, %o0, %o0 419.type aes_t4_set_decrypt_key,#function 420.size aes_t4_set_decrypt_key,.-aes_t4_set_decrypt_key 421___ 422} 423 424{{{ 425my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5)); 426my ($ileft,$iright,$ooff,$omask,$ivoff)=map("%l$_",(1..7)); 427 428$code.=<<___; 429.align 32 430_aes128_encrypt_1x: 431___ 432for ($i=0; $i<4; $i++) { 433 $code.=<<___; 434 aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f4 435 aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 436 aes_eround01 %f`16+8*$i+4`, %f4, %f2, %f0 437 aes_eround23 %f`16+8*$i+6`, %f4, %f2, %f2 438___ 439} 440$code.=<<___; 441 aes_eround01 %f48, %f0, %f2, %f4 442 aes_eround23 %f50, %f0, %f2, %f2 443 aes_eround01_l %f52, %f4, %f2, %f0 444 retl 445 aes_eround23_l %f54, %f4, %f2, %f2 446.type _aes128_encrypt_1x,#function 447.size _aes128_encrypt_1x,.-_aes128_encrypt_1x 448 449.align 32 450_aes128_encrypt_2x: 451___ 452for ($i=0; $i<4; $i++) { 453 $code.=<<___; 454 aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f8 455 aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 456 aes_eround01 %f`16+8*$i+0`, %f4, %f6, %f10 457 aes_eround23 %f`16+8*$i+2`, %f4, %f6, %f6 458 aes_eround01 %f`16+8*$i+4`, %f8, %f2, %f0 459 aes_eround23 %f`16+8*$i+6`, %f8, %f2, %f2 460 aes_eround01 %f`16+8*$i+4`, %f10, %f6, %f4 461 aes_eround23 %f`16+8*$i+6`, %f10, %f6, %f6 462___ 463} 464$code.=<<___; 465 aes_eround01 %f48, %f0, %f2, %f8 466 aes_eround23 %f50, %f0, %f2, %f2 467 aes_eround01 %f48, %f4, %f6, %f10 468 aes_eround23 %f50, %f4, %f6, %f6 469 aes_eround01_l %f52, %f8, %f2, %f0 470 aes_eround23_l %f54, %f8, %f2, %f2 471 aes_eround01_l %f52, %f10, %f6, %f4 472 retl 473 aes_eround23_l %f54, %f10, %f6, %f6 474.type _aes128_encrypt_2x,#function 475.size _aes128_encrypt_2x,.-_aes128_encrypt_2x 476 477.align 32 478_aes128_loadkey: 479 ldx [$key + 0], %g4 480 ldx [$key + 8], %g5 481___ 482for ($i=2; $i<22;$i++) { # load key schedule 483 $code.=<<___; 484 ldd [$key + `8*$i`], %f`12+2*$i` 485___ 486} 487$code.=<<___; 488 retl 489 nop 490.type _aes128_loadkey,#function 491.size _aes128_loadkey,.-_aes128_loadkey 492_aes128_load_enckey=_aes128_loadkey 493_aes128_load_deckey=_aes128_loadkey 494 495___ 496 497&alg_cbc_encrypt_implement("aes",128); 498if ($::evp) { 499 &alg_ctr32_implement("aes",128); 500 &alg_xts_implement("aes",128,"en"); 501 &alg_xts_implement("aes",128,"de"); 502} 503&alg_cbc_decrypt_implement("aes",128); 504 505$code.=<<___; 506.align 32 507_aes128_decrypt_1x: 508___ 509for ($i=0; $i<4; $i++) { 510 $code.=<<___; 511 aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4 512 aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 513 aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0 514 aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2 515___ 516} 517$code.=<<___; 518 aes_dround01 %f48, %f0, %f2, %f4 519 aes_dround23 %f50, %f0, %f2, %f2 520 aes_dround01_l %f52, %f4, %f2, %f0 521 retl 522 aes_dround23_l %f54, %f4, %f2, %f2 523.type _aes128_decrypt_1x,#function 524.size _aes128_decrypt_1x,.-_aes128_decrypt_1x 525 526.align 32 527_aes128_decrypt_2x: 528___ 529for ($i=0; $i<4; $i++) { 530 $code.=<<___; 531 aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8 532 aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 533 aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10 534 aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6 535 aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0 536 aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2 537 aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4 538 aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6 539___ 540} 541$code.=<<___; 542 aes_dround01 %f48, %f0, %f2, %f8 543 aes_dround23 %f50, %f0, %f2, %f2 544 aes_dround01 %f48, %f4, %f6, %f10 545 aes_dround23 %f50, %f4, %f6, %f6 546 aes_dround01_l %f52, %f8, %f2, %f0 547 aes_dround23_l %f54, %f8, %f2, %f2 548 aes_dround01_l %f52, %f10, %f6, %f4 549 retl 550 aes_dround23_l %f54, %f10, %f6, %f6 551.type _aes128_decrypt_2x,#function 552.size _aes128_decrypt_2x,.-_aes128_decrypt_2x 553___ 554 555$code.=<<___; 556.align 32 557_aes192_encrypt_1x: 558___ 559for ($i=0; $i<5; $i++) { 560 $code.=<<___; 561 aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f4 562 aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 563 aes_eround01 %f`16+8*$i+4`, %f4, %f2, %f0 564 aes_eround23 %f`16+8*$i+6`, %f4, %f2, %f2 565___ 566} 567$code.=<<___; 568 aes_eround01 %f56, %f0, %f2, %f4 569 aes_eround23 %f58, %f0, %f2, %f2 570 aes_eround01_l %f60, %f4, %f2, %f0 571 retl 572 aes_eround23_l %f62, %f4, %f2, %f2 573.type _aes192_encrypt_1x,#function 574.size _aes192_encrypt_1x,.-_aes192_encrypt_1x 575 576.align 32 577_aes192_encrypt_2x: 578___ 579for ($i=0; $i<5; $i++) { 580 $code.=<<___; 581 aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f8 582 aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 583 aes_eround01 %f`16+8*$i+0`, %f4, %f6, %f10 584 aes_eround23 %f`16+8*$i+2`, %f4, %f6, %f6 585 aes_eround01 %f`16+8*$i+4`, %f8, %f2, %f0 586 aes_eround23 %f`16+8*$i+6`, %f8, %f2, %f2 587 aes_eround01 %f`16+8*$i+4`, %f10, %f6, %f4 588 aes_eround23 %f`16+8*$i+6`, %f10, %f6, %f6 589___ 590} 591$code.=<<___; 592 aes_eround01 %f56, %f0, %f2, %f8 593 aes_eround23 %f58, %f0, %f2, %f2 594 aes_eround01 %f56, %f4, %f6, %f10 595 aes_eround23 %f58, %f4, %f6, %f6 596 aes_eround01_l %f60, %f8, %f2, %f0 597 aes_eround23_l %f62, %f8, %f2, %f2 598 aes_eround01_l %f60, %f10, %f6, %f4 599 retl 600 aes_eround23_l %f62, %f10, %f6, %f6 601.type _aes192_encrypt_2x,#function 602.size _aes192_encrypt_2x,.-_aes192_encrypt_2x 603 604.align 32 605_aes256_encrypt_1x: 606 aes_eround01 %f16, %f0, %f2, %f4 607 aes_eround23 %f18, %f0, %f2, %f2 608 ldd [$key + 208], %f16 609 ldd [$key + 216], %f18 610 aes_eround01 %f20, %f4, %f2, %f0 611 aes_eround23 %f22, %f4, %f2, %f2 612 ldd [$key + 224], %f20 613 ldd [$key + 232], %f22 614___ 615for ($i=1; $i<6; $i++) { 616 $code.=<<___; 617 aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f4 618 aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 619 aes_eround01 %f`16+8*$i+4`, %f4, %f2, %f0 620 aes_eround23 %f`16+8*$i+6`, %f4, %f2, %f2 621___ 622} 623$code.=<<___; 624 aes_eround01 %f16, %f0, %f2, %f4 625 aes_eround23 %f18, %f0, %f2, %f2 626 ldd [$key + 16], %f16 627 ldd [$key + 24], %f18 628 aes_eround01_l %f20, %f4, %f2, %f0 629 aes_eround23_l %f22, %f4, %f2, %f2 630 ldd [$key + 32], %f20 631 retl 632 ldd [$key + 40], %f22 633.type _aes256_encrypt_1x,#function 634.size _aes256_encrypt_1x,.-_aes256_encrypt_1x 635 636.align 32 637_aes256_encrypt_2x: 638 aes_eround01 %f16, %f0, %f2, %f8 639 aes_eround23 %f18, %f0, %f2, %f2 640 aes_eround01 %f16, %f4, %f6, %f10 641 aes_eround23 %f18, %f4, %f6, %f6 642 ldd [$key + 208], %f16 643 ldd [$key + 216], %f18 644 aes_eround01 %f20, %f8, %f2, %f0 645 aes_eround23 %f22, %f8, %f2, %f2 646 aes_eround01 %f20, %f10, %f6, %f4 647 aes_eround23 %f22, %f10, %f6, %f6 648 ldd [$key + 224], %f20 649 ldd [$key + 232], %f22 650___ 651for ($i=1; $i<6; $i++) { 652 $code.=<<___; 653 aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f8 654 aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 655 aes_eround01 %f`16+8*$i+0`, %f4, %f6, %f10 656 aes_eround23 %f`16+8*$i+2`, %f4, %f6, %f6 657 aes_eround01 %f`16+8*$i+4`, %f8, %f2, %f0 658 aes_eround23 %f`16+8*$i+6`, %f8, %f2, %f2 659 aes_eround01 %f`16+8*$i+4`, %f10, %f6, %f4 660 aes_eround23 %f`16+8*$i+6`, %f10, %f6, %f6 661___ 662} 663$code.=<<___; 664 aes_eround01 %f16, %f0, %f2, %f8 665 aes_eround23 %f18, %f0, %f2, %f2 666 aes_eround01 %f16, %f4, %f6, %f10 667 aes_eround23 %f18, %f4, %f6, %f6 668 ldd [$key + 16], %f16 669 ldd [$key + 24], %f18 670 aes_eround01_l %f20, %f8, %f2, %f0 671 aes_eround23_l %f22, %f8, %f2, %f2 672 aes_eround01_l %f20, %f10, %f6, %f4 673 aes_eround23_l %f22, %f10, %f6, %f6 674 ldd [$key + 32], %f20 675 retl 676 ldd [$key + 40], %f22 677.type _aes256_encrypt_2x,#function 678.size _aes256_encrypt_2x,.-_aes256_encrypt_2x 679 680.align 32 681_aes192_loadkey: 682 ldx [$key + 0], %g4 683 ldx [$key + 8], %g5 684___ 685for ($i=2; $i<26;$i++) { # load key schedule 686 $code.=<<___; 687 ldd [$key + `8*$i`], %f`12+2*$i` 688___ 689} 690$code.=<<___; 691 retl 692 nop 693.type _aes192_loadkey,#function 694.size _aes192_loadkey,.-_aes192_loadkey 695_aes256_loadkey=_aes192_loadkey 696_aes192_load_enckey=_aes192_loadkey 697_aes192_load_deckey=_aes192_loadkey 698_aes256_load_enckey=_aes192_loadkey 699_aes256_load_deckey=_aes192_loadkey 700___ 701 702&alg_cbc_encrypt_implement("aes",256); 703&alg_cbc_encrypt_implement("aes",192); 704if ($::evp) { 705 &alg_ctr32_implement("aes",256); 706 &alg_xts_implement("aes",256,"en"); 707 &alg_xts_implement("aes",256,"de"); 708 &alg_ctr32_implement("aes",192); 709} 710&alg_cbc_decrypt_implement("aes",192); 711&alg_cbc_decrypt_implement("aes",256); 712 713$code.=<<___; 714.align 32 715_aes256_decrypt_1x: 716 aes_dround01 %f16, %f0, %f2, %f4 717 aes_dround23 %f18, %f0, %f2, %f2 718 ldd [$key + 208], %f16 719 ldd [$key + 216], %f18 720 aes_dround01 %f20, %f4, %f2, %f0 721 aes_dround23 %f22, %f4, %f2, %f2 722 ldd [$key + 224], %f20 723 ldd [$key + 232], %f22 724___ 725for ($i=1; $i<6; $i++) { 726 $code.=<<___; 727 aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4 728 aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 729 aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0 730 aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2 731___ 732} 733$code.=<<___; 734 aes_dround01 %f16, %f0, %f2, %f4 735 aes_dround23 %f18, %f0, %f2, %f2 736 ldd [$key + 16], %f16 737 ldd [$key + 24], %f18 738 aes_dround01_l %f20, %f4, %f2, %f0 739 aes_dround23_l %f22, %f4, %f2, %f2 740 ldd [$key + 32], %f20 741 retl 742 ldd [$key + 40], %f22 743.type _aes256_decrypt_1x,#function 744.size _aes256_decrypt_1x,.-_aes256_decrypt_1x 745 746.align 32 747_aes256_decrypt_2x: 748 aes_dround01 %f16, %f0, %f2, %f8 749 aes_dround23 %f18, %f0, %f2, %f2 750 aes_dround01 %f16, %f4, %f6, %f10 751 aes_dround23 %f18, %f4, %f6, %f6 752 ldd [$key + 208], %f16 753 ldd [$key + 216], %f18 754 aes_dround01 %f20, %f8, %f2, %f0 755 aes_dround23 %f22, %f8, %f2, %f2 756 aes_dround01 %f20, %f10, %f6, %f4 757 aes_dround23 %f22, %f10, %f6, %f6 758 ldd [$key + 224], %f20 759 ldd [$key + 232], %f22 760___ 761for ($i=1; $i<6; $i++) { 762 $code.=<<___; 763 aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8 764 aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 765 aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10 766 aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6 767 aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0 768 aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2 769 aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4 770 aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6 771___ 772} 773$code.=<<___; 774 aes_dround01 %f16, %f0, %f2, %f8 775 aes_dround23 %f18, %f0, %f2, %f2 776 aes_dround01 %f16, %f4, %f6, %f10 777 aes_dround23 %f18, %f4, %f6, %f6 778 ldd [$key + 16], %f16 779 ldd [$key + 24], %f18 780 aes_dround01_l %f20, %f8, %f2, %f0 781 aes_dround23_l %f22, %f8, %f2, %f2 782 aes_dround01_l %f20, %f10, %f6, %f4 783 aes_dround23_l %f22, %f10, %f6, %f6 784 ldd [$key + 32], %f20 785 retl 786 ldd [$key + 40], %f22 787.type _aes256_decrypt_2x,#function 788.size _aes256_decrypt_2x,.-_aes256_decrypt_2x 789 790.align 32 791_aes192_decrypt_1x: 792___ 793for ($i=0; $i<5; $i++) { 794 $code.=<<___; 795 aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4 796 aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 797 aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0 798 aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2 799___ 800} 801$code.=<<___; 802 aes_dround01 %f56, %f0, %f2, %f4 803 aes_dround23 %f58, %f0, %f2, %f2 804 aes_dround01_l %f60, %f4, %f2, %f0 805 retl 806 aes_dround23_l %f62, %f4, %f2, %f2 807.type _aes192_decrypt_1x,#function 808.size _aes192_decrypt_1x,.-_aes192_decrypt_1x 809 810.align 32 811_aes192_decrypt_2x: 812___ 813for ($i=0; $i<5; $i++) { 814 $code.=<<___; 815 aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8 816 aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 817 aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10 818 aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6 819 aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0 820 aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2 821 aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4 822 aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6 823___ 824} 825$code.=<<___; 826 aes_dround01 %f56, %f0, %f2, %f8 827 aes_dround23 %f58, %f0, %f2, %f2 828 aes_dround01 %f56, %f4, %f6, %f10 829 aes_dround23 %f58, %f4, %f6, %f6 830 aes_dround01_l %f60, %f8, %f2, %f0 831 aes_dround23_l %f62, %f8, %f2, %f2 832 aes_dround01_l %f60, %f10, %f6, %f4 833 retl 834 aes_dround23_l %f62, %f10, %f6, %f6 835.type _aes192_decrypt_2x,#function 836.size _aes192_decrypt_2x,.-_aes192_decrypt_2x 837___ 838}}} 839 840if (!$::evp) { 841$code.=<<___; 842.global AES_encrypt 843AES_encrypt=aes_t4_encrypt 844.global AES_decrypt 845AES_decrypt=aes_t4_decrypt 846.global AES_set_encrypt_key 847.align 32 848AES_set_encrypt_key: 849 andcc %o2, 7, %g0 ! check alignment 850 bnz,a,pn %icc, 1f 851 mov -1, %o0 852 brz,a,pn %o0, 1f 853 mov -1, %o0 854 brz,a,pn %o2, 1f 855 mov -1, %o0 856 andncc %o1, 0x1c0, %g0 857 bnz,a,pn %icc, 1f 858 mov -2, %o0 859 cmp %o1, 128 860 bl,a,pn %icc, 1f 861 mov -2, %o0 862 b aes_t4_set_encrypt_key 863 nop 8641: retl 865 nop 866.type AES_set_encrypt_key,#function 867.size AES_set_encrypt_key,.-AES_set_encrypt_key 868 869.global AES_set_decrypt_key 870.align 32 871AES_set_decrypt_key: 872 andcc %o2, 7, %g0 ! check alignment 873 bnz,a,pn %icc, 1f 874 mov -1, %o0 875 brz,a,pn %o0, 1f 876 mov -1, %o0 877 brz,a,pn %o2, 1f 878 mov -1, %o0 879 andncc %o1, 0x1c0, %g0 880 bnz,a,pn %icc, 1f 881 mov -2, %o0 882 cmp %o1, 128 883 bl,a,pn %icc, 1f 884 mov -2, %o0 885 b aes_t4_set_decrypt_key 886 nop 8871: retl 888 nop 889.type AES_set_decrypt_key,#function 890.size AES_set_decrypt_key,.-AES_set_decrypt_key 891___ 892 893my ($inp,$out,$len,$key,$ivec,$enc)=map("%o$_",(0..5)); 894 895$code.=<<___; 896.globl AES_cbc_encrypt 897.align 32 898AES_cbc_encrypt: 899 ld [$key + 240], %g1 900 nop 901 brz $enc, .Lcbc_decrypt 902 cmp %g1, 12 903 904 bl,pt %icc, aes128_t4_cbc_encrypt 905 nop 906 be,pn %icc, aes192_t4_cbc_encrypt 907 nop 908 ba aes256_t4_cbc_encrypt 909 nop 910 911.Lcbc_decrypt: 912 bl,pt %icc, aes128_t4_cbc_decrypt 913 nop 914 be,pn %icc, aes192_t4_cbc_decrypt 915 nop 916 ba aes256_t4_cbc_decrypt 917 nop 918.type AES_cbc_encrypt,#function 919.size AES_cbc_encrypt,.-AES_cbc_encrypt 920___ 921} 922$code.=<<___; 923.asciz "AES for SPARC T4, David S. Miller, Andy Polyakov" 924.align 4 925___ 926 927&emit_assembler(); 928 929close STDOUT or die "error closing STDOUT: $!"; 930