1#! /usr/bin/env perl 2# Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16 17# AES for s390x. 18 19# April 2007. 20# 21# Software performance improvement over gcc-generated code is ~70% and 22# in absolute terms is ~73 cycles per byte processed with 128-bit key. 23# You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are 24# *strictly* in-order execution and issued instruction [in this case 25# load value from memory is critical] has to complete before execution 26# flow proceeds. S-boxes are compressed to 2KB[+256B]. 27# 28# As for hardware acceleration support. It's basically a "teaser," as 29# it can and should be improved in several ways. Most notably support 30# for CBC is not utilized, nor multiple blocks are ever processed. 31# Then software key schedule can be postponed till hardware support 32# detection... Performance improvement over assembler is reportedly 33# ~2.5x, but can reach >8x [naturally on larger chunks] if proper 34# support is implemented. 35 36# May 2007. 37# 38# Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided 39# for 128-bit keys, if hardware support is detected. 40 41# January 2009. 42# 43# Add support for hardware AES192/256 and reschedule instructions to 44# minimize/avoid Address Generation Interlock hazard and to favour 45# dual-issue z10 pipeline. This gave ~25% improvement on z10 and 46# almost 50% on z9. The gain is smaller on z10, because being dual- 47# issue z10 makes it impossible to eliminate the interlock condition: 48# critical path is not long enough. Yet it spends ~24 cycles per byte 49# processed with 128-bit key. 50# 51# Unlike previous version hardware support detection takes place only 52# at the moment of key schedule setup, which is denoted in key->rounds. 53# This is done, because deferred key setup can't be made MT-safe, not 54# for keys longer than 128 bits. 55# 56# Add AES_cbc_encrypt, which gives incredible performance improvement, 57# it was measured to be ~6.6x. It's less than previously mentioned 8x, 58# because software implementation was optimized. 59 60# May 2010. 61# 62# Add AES_ctr32_encrypt. If hardware-assisted, it provides up to 4.3x 63# performance improvement over "generic" counter mode routine relying 64# on single-block, also hardware-assisted, AES_encrypt. "Up to" refers 65# to the fact that exact throughput value depends on current stack 66# frame alignment within 4KB page. In worst case you get ~75% of the 67# maximum, but *on average* it would be as much as ~98%. Meaning that 68# worst case is unlike, it's like hitting ravine on plateau. 69 70# November 2010. 71# 72# Adapt for -m31 build. If kernel supports what's called "highgprs" 73# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit 74# instructions and achieve "64-bit" performance even in 31-bit legacy 75# application context. The feature is not specific to any particular 76# processor, as long as it's "z-CPU". Latter implies that the code 77# remains z/Architecture specific. On z990 it was measured to perform 78# 2x better than code generated by gcc 4.3. 79 80# December 2010. 81# 82# Add support for z196 "cipher message with counter" instruction. 83# Note however that it's disengaged, because it was measured to 84# perform ~12% worse than vanilla km-based code... 85 86# February 2011. 87# 88# Add AES_xts_[en|de]crypt. This includes support for z196 km-xts-aes 89# instructions, which deliver ~70% improvement at 8KB block size over 90# vanilla km-based code, 37% - at most like 512-bytes block size. 91 92$flavour = shift; 93 94if ($flavour =~ /3[12]/) { 95 $SIZE_T=4; 96 $g=""; 97} else { 98 $SIZE_T=8; 99 $g="g"; 100} 101 102while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} 103open STDOUT,">$output"; 104 105$softonly=0; # allow hardware support 106 107$t0="%r0"; $mask="%r0"; 108$t1="%r1"; 109$t2="%r2"; $inp="%r2"; 110$t3="%r3"; $out="%r3"; $bits="%r3"; 111$key="%r4"; 112$i1="%r5"; 113$i2="%r6"; 114$i3="%r7"; 115$s0="%r8"; 116$s1="%r9"; 117$s2="%r10"; 118$s3="%r11"; 119$tbl="%r12"; 120$rounds="%r13"; 121$ra="%r14"; 122$sp="%r15"; 123 124$stdframe=16*$SIZE_T+4*8; 125 126sub _data_word() 127{ my $i; 128 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; } 129} 130 131$code=<<___; 132#include "s390x_arch.h" 133 134.text 135 136.type AES_Te,\@object 137.align 256 138AES_Te: 139___ 140&_data_word( 141 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 142 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, 143 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, 144 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 145 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, 146 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, 147 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 148 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, 149 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, 150 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 151 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, 152 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, 153 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 154 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, 155 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, 156 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 157 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, 158 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, 159 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 160 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, 161 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, 162 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 163 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, 164 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, 165 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 166 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, 167 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, 168 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 169 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, 170 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, 171 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 172 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, 173 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, 174 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 175 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, 176 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, 177 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 178 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, 179 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, 180 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 181 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, 182 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, 183 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 184 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, 185 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, 186 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 187 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, 188 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, 189 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 190 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, 191 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, 192 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 193 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, 194 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, 195 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 196 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, 197 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, 198 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 199 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, 200 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, 201 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 202 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, 203 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, 204 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); 205$code.=<<___; 206# Te4[256] 207.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 208.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 209.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 210.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 211.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 212.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 213.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 214.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 215.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 216.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 217.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 218.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 219.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 220.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 221.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 222.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 223.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 224.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 225.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 226.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 227.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 228.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 229.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 230.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 231.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 232.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 233.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 234.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 235.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 236.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 237.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 238.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 239# rcon[] 240.long 0x01000000, 0x02000000, 0x04000000, 0x08000000 241.long 0x10000000, 0x20000000, 0x40000000, 0x80000000 242.long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 243.align 256 244.size AES_Te,.-AES_Te 245 246# void AES_encrypt(const unsigned char *inp, unsigned char *out, 247# const AES_KEY *key) { 248.globl AES_encrypt 249.type AES_encrypt,\@function 250AES_encrypt: 251___ 252$code.=<<___ if (!$softonly); 253 l %r0,240($key) 254 lhi %r1,16 255 clr %r0,%r1 256 jl .Lesoft 257 258 la %r1,0($key) 259 #la %r2,0($inp) 260 la %r4,0($out) 261 lghi %r3,16 # single block length 262 .long 0xb92e0042 # km %r4,%r2 263 brc 1,.-4 # can this happen? 264 br %r14 265.align 64 266.Lesoft: 267___ 268$code.=<<___; 269 stm${g} %r3,$ra,3*$SIZE_T($sp) 270 271 llgf $s0,0($inp) 272 llgf $s1,4($inp) 273 llgf $s2,8($inp) 274 llgf $s3,12($inp) 275 276 larl $tbl,AES_Te 277 bras $ra,_s390x_AES_encrypt 278 279 l${g} $out,3*$SIZE_T($sp) 280 st $s0,0($out) 281 st $s1,4($out) 282 st $s2,8($out) 283 st $s3,12($out) 284 285 lm${g} %r6,$ra,6*$SIZE_T($sp) 286 br $ra 287.size AES_encrypt,.-AES_encrypt 288 289.type _s390x_AES_encrypt,\@function 290.align 16 291_s390x_AES_encrypt: 292 st${g} $ra,15*$SIZE_T($sp) 293 x $s0,0($key) 294 x $s1,4($key) 295 x $s2,8($key) 296 x $s3,12($key) 297 l $rounds,240($key) 298 llill $mask,`0xff<<3` 299 aghi $rounds,-1 300 j .Lenc_loop 301.align 16 302.Lenc_loop: 303 sllg $t1,$s0,`0+3` 304 srlg $t2,$s0,`8-3` 305 srlg $t3,$s0,`16-3` 306 srl $s0,`24-3` 307 nr $s0,$mask 308 ngr $t1,$mask 309 nr $t2,$mask 310 nr $t3,$mask 311 312 srlg $i1,$s1,`16-3` # i0 313 sllg $i2,$s1,`0+3` 314 srlg $i3,$s1,`8-3` 315 srl $s1,`24-3` 316 nr $i1,$mask 317 nr $s1,$mask 318 ngr $i2,$mask 319 nr $i3,$mask 320 321 l $s0,0($s0,$tbl) # Te0[s0>>24] 322 l $t1,1($t1,$tbl) # Te3[s0>>0] 323 l $t2,2($t2,$tbl) # Te2[s0>>8] 324 l $t3,3($t3,$tbl) # Te1[s0>>16] 325 326 x $s0,3($i1,$tbl) # Te1[s1>>16] 327 l $s1,0($s1,$tbl) # Te0[s1>>24] 328 x $t2,1($i2,$tbl) # Te3[s1>>0] 329 x $t3,2($i3,$tbl) # Te2[s1>>8] 330 331 srlg $i1,$s2,`8-3` # i0 332 srlg $i2,$s2,`16-3` # i1 333 nr $i1,$mask 334 nr $i2,$mask 335 sllg $i3,$s2,`0+3` 336 srl $s2,`24-3` 337 nr $s2,$mask 338 ngr $i3,$mask 339 340 xr $s1,$t1 341 srlg $ra,$s3,`8-3` # i1 342 sllg $t1,$s3,`0+3` # i0 343 nr $ra,$mask 344 la $key,16($key) 345 ngr $t1,$mask 346 347 x $s0,2($i1,$tbl) # Te2[s2>>8] 348 x $s1,3($i2,$tbl) # Te1[s2>>16] 349 l $s2,0($s2,$tbl) # Te0[s2>>24] 350 x $t3,1($i3,$tbl) # Te3[s2>>0] 351 352 srlg $i3,$s3,`16-3` # i2 353 xr $s2,$t2 354 srl $s3,`24-3` 355 nr $i3,$mask 356 nr $s3,$mask 357 358 x $s0,0($key) 359 x $s1,4($key) 360 x $s2,8($key) 361 x $t3,12($key) 362 363 x $s0,1($t1,$tbl) # Te3[s3>>0] 364 x $s1,2($ra,$tbl) # Te2[s3>>8] 365 x $s2,3($i3,$tbl) # Te1[s3>>16] 366 l $s3,0($s3,$tbl) # Te0[s3>>24] 367 xr $s3,$t3 368 369 brct $rounds,.Lenc_loop 370 .align 16 371 372 sllg $t1,$s0,`0+3` 373 srlg $t2,$s0,`8-3` 374 ngr $t1,$mask 375 srlg $t3,$s0,`16-3` 376 srl $s0,`24-3` 377 nr $s0,$mask 378 nr $t2,$mask 379 nr $t3,$mask 380 381 srlg $i1,$s1,`16-3` # i0 382 sllg $i2,$s1,`0+3` 383 ngr $i2,$mask 384 srlg $i3,$s1,`8-3` 385 srl $s1,`24-3` 386 nr $i1,$mask 387 nr $s1,$mask 388 nr $i3,$mask 389 390 llgc $s0,2($s0,$tbl) # Te4[s0>>24] 391 llgc $t1,2($t1,$tbl) # Te4[s0>>0] 392 sll $s0,24 393 llgc $t2,2($t2,$tbl) # Te4[s0>>8] 394 llgc $t3,2($t3,$tbl) # Te4[s0>>16] 395 sll $t2,8 396 sll $t3,16 397 398 llgc $i1,2($i1,$tbl) # Te4[s1>>16] 399 llgc $s1,2($s1,$tbl) # Te4[s1>>24] 400 llgc $i2,2($i2,$tbl) # Te4[s1>>0] 401 llgc $i3,2($i3,$tbl) # Te4[s1>>8] 402 sll $i1,16 403 sll $s1,24 404 sll $i3,8 405 or $s0,$i1 406 or $s1,$t1 407 or $t2,$i2 408 or $t3,$i3 409 410 srlg $i1,$s2,`8-3` # i0 411 srlg $i2,$s2,`16-3` # i1 412 nr $i1,$mask 413 nr $i2,$mask 414 sllg $i3,$s2,`0+3` 415 srl $s2,`24-3` 416 ngr $i3,$mask 417 nr $s2,$mask 418 419 sllg $t1,$s3,`0+3` # i0 420 srlg $ra,$s3,`8-3` # i1 421 ngr $t1,$mask 422 423 llgc $i1,2($i1,$tbl) # Te4[s2>>8] 424 llgc $i2,2($i2,$tbl) # Te4[s2>>16] 425 sll $i1,8 426 llgc $s2,2($s2,$tbl) # Te4[s2>>24] 427 llgc $i3,2($i3,$tbl) # Te4[s2>>0] 428 sll $i2,16 429 nr $ra,$mask 430 sll $s2,24 431 or $s0,$i1 432 or $s1,$i2 433 or $s2,$t2 434 or $t3,$i3 435 436 srlg $i3,$s3,`16-3` # i2 437 srl $s3,`24-3` 438 nr $i3,$mask 439 nr $s3,$mask 440 441 l $t0,16($key) 442 l $t2,20($key) 443 444 llgc $i1,2($t1,$tbl) # Te4[s3>>0] 445 llgc $i2,2($ra,$tbl) # Te4[s3>>8] 446 llgc $i3,2($i3,$tbl) # Te4[s3>>16] 447 llgc $s3,2($s3,$tbl) # Te4[s3>>24] 448 sll $i2,8 449 sll $i3,16 450 sll $s3,24 451 or $s0,$i1 452 or $s1,$i2 453 or $s2,$i3 454 or $s3,$t3 455 456 l${g} $ra,15*$SIZE_T($sp) 457 xr $s0,$t0 458 xr $s1,$t2 459 x $s2,24($key) 460 x $s3,28($key) 461 462 br $ra 463.size _s390x_AES_encrypt,.-_s390x_AES_encrypt 464___ 465 466$code.=<<___; 467.type AES_Td,\@object 468.align 256 469AES_Td: 470___ 471&_data_word( 472 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 473 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, 474 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, 475 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, 476 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, 477 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, 478 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 479 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, 480 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, 481 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, 482 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, 483 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, 484 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 485 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, 486 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, 487 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, 488 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, 489 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, 490 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 491 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, 492 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, 493 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, 494 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, 495 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, 496 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 497 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, 498 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, 499 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, 500 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, 501 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, 502 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 503 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, 504 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, 505 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, 506 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, 507 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, 508 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 509 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, 510 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, 511 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, 512 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, 513 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, 514 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 515 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, 516 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, 517 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, 518 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, 519 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, 520 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 521 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, 522 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, 523 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, 524 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, 525 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, 526 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 527 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, 528 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, 529 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, 530 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, 531 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, 532 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 533 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, 534 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, 535 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); 536$code.=<<___; 537# Td4[256] 538.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 539.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 540.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 541.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 542.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 543.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 544.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 545.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 546.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 547.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 548.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 549.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 550.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 551.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 552.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 553.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 554.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 555.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 556.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 557.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 558.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 559.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 560.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 561.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 562.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 563.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 564.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 565.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 566.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 567.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 568.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 569.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 570.size AES_Td,.-AES_Td 571 572# void AES_decrypt(const unsigned char *inp, unsigned char *out, 573# const AES_KEY *key) { 574.globl AES_decrypt 575.type AES_decrypt,\@function 576AES_decrypt: 577___ 578$code.=<<___ if (!$softonly); 579 l %r0,240($key) 580 lhi %r1,16 581 clr %r0,%r1 582 jl .Ldsoft 583 584 la %r1,0($key) 585 #la %r2,0($inp) 586 la %r4,0($out) 587 lghi %r3,16 # single block length 588 .long 0xb92e0042 # km %r4,%r2 589 brc 1,.-4 # can this happen? 590 br %r14 591.align 64 592.Ldsoft: 593___ 594$code.=<<___; 595 stm${g} %r3,$ra,3*$SIZE_T($sp) 596 597 llgf $s0,0($inp) 598 llgf $s1,4($inp) 599 llgf $s2,8($inp) 600 llgf $s3,12($inp) 601 602 larl $tbl,AES_Td 603 bras $ra,_s390x_AES_decrypt 604 605 l${g} $out,3*$SIZE_T($sp) 606 st $s0,0($out) 607 st $s1,4($out) 608 st $s2,8($out) 609 st $s3,12($out) 610 611 lm${g} %r6,$ra,6*$SIZE_T($sp) 612 br $ra 613.size AES_decrypt,.-AES_decrypt 614 615.type _s390x_AES_decrypt,\@function 616.align 16 617_s390x_AES_decrypt: 618 st${g} $ra,15*$SIZE_T($sp) 619 x $s0,0($key) 620 x $s1,4($key) 621 x $s2,8($key) 622 x $s3,12($key) 623 l $rounds,240($key) 624 llill $mask,`0xff<<3` 625 aghi $rounds,-1 626 j .Ldec_loop 627.align 16 628.Ldec_loop: 629 srlg $t1,$s0,`16-3` 630 srlg $t2,$s0,`8-3` 631 sllg $t3,$s0,`0+3` 632 srl $s0,`24-3` 633 nr $s0,$mask 634 nr $t1,$mask 635 nr $t2,$mask 636 ngr $t3,$mask 637 638 sllg $i1,$s1,`0+3` # i0 639 srlg $i2,$s1,`16-3` 640 srlg $i3,$s1,`8-3` 641 srl $s1,`24-3` 642 ngr $i1,$mask 643 nr $s1,$mask 644 nr $i2,$mask 645 nr $i3,$mask 646 647 l $s0,0($s0,$tbl) # Td0[s0>>24] 648 l $t1,3($t1,$tbl) # Td1[s0>>16] 649 l $t2,2($t2,$tbl) # Td2[s0>>8] 650 l $t3,1($t3,$tbl) # Td3[s0>>0] 651 652 x $s0,1($i1,$tbl) # Td3[s1>>0] 653 l $s1,0($s1,$tbl) # Td0[s1>>24] 654 x $t2,3($i2,$tbl) # Td1[s1>>16] 655 x $t3,2($i3,$tbl) # Td2[s1>>8] 656 657 srlg $i1,$s2,`8-3` # i0 658 sllg $i2,$s2,`0+3` # i1 659 srlg $i3,$s2,`16-3` 660 srl $s2,`24-3` 661 nr $i1,$mask 662 ngr $i2,$mask 663 nr $s2,$mask 664 nr $i3,$mask 665 666 xr $s1,$t1 667 srlg $ra,$s3,`8-3` # i1 668 srlg $t1,$s3,`16-3` # i0 669 nr $ra,$mask 670 la $key,16($key) 671 nr $t1,$mask 672 673 x $s0,2($i1,$tbl) # Td2[s2>>8] 674 x $s1,1($i2,$tbl) # Td3[s2>>0] 675 l $s2,0($s2,$tbl) # Td0[s2>>24] 676 x $t3,3($i3,$tbl) # Td1[s2>>16] 677 678 sllg $i3,$s3,`0+3` # i2 679 srl $s3,`24-3` 680 ngr $i3,$mask 681 nr $s3,$mask 682 683 xr $s2,$t2 684 x $s0,0($key) 685 x $s1,4($key) 686 x $s2,8($key) 687 x $t3,12($key) 688 689 x $s0,3($t1,$tbl) # Td1[s3>>16] 690 x $s1,2($ra,$tbl) # Td2[s3>>8] 691 x $s2,1($i3,$tbl) # Td3[s3>>0] 692 l $s3,0($s3,$tbl) # Td0[s3>>24] 693 xr $s3,$t3 694 695 brct $rounds,.Ldec_loop 696 .align 16 697 698 l $t1,`2048+0`($tbl) # prefetch Td4 699 l $t2,`2048+64`($tbl) 700 l $t3,`2048+128`($tbl) 701 l $i1,`2048+192`($tbl) 702 llill $mask,0xff 703 704 srlg $i3,$s0,24 # i0 705 srlg $t1,$s0,16 706 srlg $t2,$s0,8 707 nr $s0,$mask # i3 708 nr $t1,$mask 709 710 srlg $i1,$s1,24 711 nr $t2,$mask 712 srlg $i2,$s1,16 713 srlg $ra,$s1,8 714 nr $s1,$mask # i0 715 nr $i2,$mask 716 nr $ra,$mask 717 718 llgc $i3,2048($i3,$tbl) # Td4[s0>>24] 719 llgc $t1,2048($t1,$tbl) # Td4[s0>>16] 720 llgc $t2,2048($t2,$tbl) # Td4[s0>>8] 721 sll $t1,16 722 llgc $t3,2048($s0,$tbl) # Td4[s0>>0] 723 sllg $s0,$i3,24 724 sll $t2,8 725 726 llgc $s1,2048($s1,$tbl) # Td4[s1>>0] 727 llgc $i1,2048($i1,$tbl) # Td4[s1>>24] 728 llgc $i2,2048($i2,$tbl) # Td4[s1>>16] 729 sll $i1,24 730 llgc $i3,2048($ra,$tbl) # Td4[s1>>8] 731 sll $i2,16 732 sll $i3,8 733 or $s0,$s1 734 or $t1,$i1 735 or $t2,$i2 736 or $t3,$i3 737 738 srlg $i1,$s2,8 # i0 739 srlg $i2,$s2,24 740 srlg $i3,$s2,16 741 nr $s2,$mask # i1 742 nr $i1,$mask 743 nr $i3,$mask 744 llgc $i1,2048($i1,$tbl) # Td4[s2>>8] 745 llgc $s1,2048($s2,$tbl) # Td4[s2>>0] 746 llgc $i2,2048($i2,$tbl) # Td4[s2>>24] 747 llgc $i3,2048($i3,$tbl) # Td4[s2>>16] 748 sll $i1,8 749 sll $i2,24 750 or $s0,$i1 751 sll $i3,16 752 or $t2,$i2 753 or $t3,$i3 754 755 srlg $i1,$s3,16 # i0 756 srlg $i2,$s3,8 # i1 757 srlg $i3,$s3,24 758 nr $s3,$mask # i2 759 nr $i1,$mask 760 nr $i2,$mask 761 762 l${g} $ra,15*$SIZE_T($sp) 763 or $s1,$t1 764 l $t0,16($key) 765 l $t1,20($key) 766 767 llgc $i1,2048($i1,$tbl) # Td4[s3>>16] 768 llgc $i2,2048($i2,$tbl) # Td4[s3>>8] 769 sll $i1,16 770 llgc $s2,2048($s3,$tbl) # Td4[s3>>0] 771 llgc $s3,2048($i3,$tbl) # Td4[s3>>24] 772 sll $i2,8 773 sll $s3,24 774 or $s0,$i1 775 or $s1,$i2 776 or $s2,$t2 777 or $s3,$t3 778 779 xr $s0,$t0 780 xr $s1,$t1 781 x $s2,24($key) 782 x $s3,28($key) 783 784 br $ra 785.size _s390x_AES_decrypt,.-_s390x_AES_decrypt 786___ 787 788$code.=<<___; 789# void AES_set_encrypt_key(const unsigned char *in, int bits, 790# AES_KEY *key) { 791.globl AES_set_encrypt_key 792.type AES_set_encrypt_key,\@function 793.align 16 794AES_set_encrypt_key: 795_s390x_AES_set_encrypt_key: 796 lghi $t0,0 797 cl${g}r $inp,$t0 798 je .Lminus1 799 cl${g}r $key,$t0 800 je .Lminus1 801 802 lghi $t0,128 803 clr $bits,$t0 804 je .Lproceed 805 lghi $t0,192 806 clr $bits,$t0 807 je .Lproceed 808 lghi $t0,256 809 clr $bits,$t0 810 je .Lproceed 811 lghi %r2,-2 812 br %r14 813 814.align 16 815.Lproceed: 816___ 817$code.=<<___ if (!$softonly); 818 # convert bits to km(c) code, [128,192,256]->[18,19,20] 819 lhi %r5,-128 820 lhi %r0,18 821 ar %r5,$bits 822 srl %r5,6 823 ar %r5,%r0 824 825 larl %r1,OPENSSL_s390xcap_P 826 llihh %r0,0x8000 827 srlg %r0,%r0,0(%r5) 828 ng %r0,S390X_KM(%r1) # check availability of both km... 829 ng %r0,S390X_KMC(%r1) # ...and kmc support for given key length 830 jz .Lekey_internal 831 832 lmg %r0,%r1,0($inp) # just copy 128 bits... 833 stmg %r0,%r1,0($key) 834 lhi %r0,192 835 cr $bits,%r0 836 jl 1f 837 lg %r1,16($inp) 838 stg %r1,16($key) 839 je 1f 840 lg %r1,24($inp) 841 stg %r1,24($key) 8421: st $bits,236($key) # save bits [for debugging purposes] 843 lgr $t0,%r5 844 st %r5,240($key) # save km(c) code 845 lghi %r2,0 846 br %r14 847___ 848$code.=<<___; 849.align 16 850.Lekey_internal: 851 stm${g} %r4,%r13,4*$SIZE_T($sp) # all non-volatile regs and $key 852 853 larl $tbl,AES_Te+2048 854 855 llgf $s0,0($inp) 856 llgf $s1,4($inp) 857 llgf $s2,8($inp) 858 llgf $s3,12($inp) 859 st $s0,0($key) 860 st $s1,4($key) 861 st $s2,8($key) 862 st $s3,12($key) 863 lghi $t0,128 864 cr $bits,$t0 865 jne .Lnot128 866 867 llill $mask,0xff 868 lghi $t3,0 # i=0 869 lghi $rounds,10 870 st $rounds,240($key) 871 872 llgfr $t2,$s3 # temp=rk[3] 873 srlg $i1,$s3,8 874 srlg $i2,$s3,16 875 srlg $i3,$s3,24 876 nr $t2,$mask 877 nr $i1,$mask 878 nr $i2,$mask 879 880.align 16 881.L128_loop: 882 la $t2,0($t2,$tbl) 883 la $i1,0($i1,$tbl) 884 la $i2,0($i2,$tbl) 885 la $i3,0($i3,$tbl) 886 icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8 887 icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16 888 icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24 889 icm $t2,1,0($i3) # Te4[rk[3]>>24] 890 x $t2,256($t3,$tbl) # rcon[i] 891 xr $s0,$t2 # rk[4]=rk[0]^... 892 xr $s1,$s0 # rk[5]=rk[1]^rk[4] 893 xr $s2,$s1 # rk[6]=rk[2]^rk[5] 894 xr $s3,$s2 # rk[7]=rk[3]^rk[6] 895 896 llgfr $t2,$s3 # temp=rk[3] 897 srlg $i1,$s3,8 898 srlg $i2,$s3,16 899 nr $t2,$mask 900 nr $i1,$mask 901 srlg $i3,$s3,24 902 nr $i2,$mask 903 904 st $s0,16($key) 905 st $s1,20($key) 906 st $s2,24($key) 907 st $s3,28($key) 908 la $key,16($key) # key+=4 909 la $t3,4($t3) # i++ 910 brct $rounds,.L128_loop 911 lghi $t0,10 912 lghi %r2,0 913 lm${g} %r4,%r13,4*$SIZE_T($sp) 914 br $ra 915 916.align 16 917.Lnot128: 918 llgf $t0,16($inp) 919 llgf $t1,20($inp) 920 st $t0,16($key) 921 st $t1,20($key) 922 lghi $t0,192 923 cr $bits,$t0 924 jne .Lnot192 925 926 llill $mask,0xff 927 lghi $t3,0 # i=0 928 lghi $rounds,12 929 st $rounds,240($key) 930 lghi $rounds,8 931 932 srlg $i1,$t1,8 933 srlg $i2,$t1,16 934 srlg $i3,$t1,24 935 nr $t1,$mask 936 nr $i1,$mask 937 nr $i2,$mask 938 939.align 16 940.L192_loop: 941 la $t1,0($t1,$tbl) 942 la $i1,0($i1,$tbl) 943 la $i2,0($i2,$tbl) 944 la $i3,0($i3,$tbl) 945 icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8 946 icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16 947 icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24 948 icm $t1,1,0($i3) # Te4[rk[5]>>24] 949 x $t1,256($t3,$tbl) # rcon[i] 950 xr $s0,$t1 # rk[6]=rk[0]^... 951 xr $s1,$s0 # rk[7]=rk[1]^rk[6] 952 xr $s2,$s1 # rk[8]=rk[2]^rk[7] 953 xr $s3,$s2 # rk[9]=rk[3]^rk[8] 954 955 st $s0,24($key) 956 st $s1,28($key) 957 st $s2,32($key) 958 st $s3,36($key) 959 brct $rounds,.L192_continue 960 lghi $t0,12 961 lghi %r2,0 962 lm${g} %r4,%r13,4*$SIZE_T($sp) 963 br $ra 964 965.align 16 966.L192_continue: 967 lgr $t1,$s3 968 x $t1,16($key) # rk[10]=rk[4]^rk[9] 969 st $t1,40($key) 970 x $t1,20($key) # rk[11]=rk[5]^rk[10] 971 st $t1,44($key) 972 973 srlg $i1,$t1,8 974 srlg $i2,$t1,16 975 srlg $i3,$t1,24 976 nr $t1,$mask 977 nr $i1,$mask 978 nr $i2,$mask 979 980 la $key,24($key) # key+=6 981 la $t3,4($t3) # i++ 982 j .L192_loop 983 984.align 16 985.Lnot192: 986 llgf $t0,24($inp) 987 llgf $t1,28($inp) 988 st $t0,24($key) 989 st $t1,28($key) 990 llill $mask,0xff 991 lghi $t3,0 # i=0 992 lghi $rounds,14 993 st $rounds,240($key) 994 lghi $rounds,7 995 996 srlg $i1,$t1,8 997 srlg $i2,$t1,16 998 srlg $i3,$t1,24 999 nr $t1,$mask 1000 nr $i1,$mask 1001 nr $i2,$mask 1002 1003.align 16 1004.L256_loop: 1005 la $t1,0($t1,$tbl) 1006 la $i1,0($i1,$tbl) 1007 la $i2,0($i2,$tbl) 1008 la $i3,0($i3,$tbl) 1009 icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8 1010 icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16 1011 icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24 1012 icm $t1,1,0($i3) # Te4[rk[7]>>24] 1013 x $t1,256($t3,$tbl) # rcon[i] 1014 xr $s0,$t1 # rk[8]=rk[0]^... 1015 xr $s1,$s0 # rk[9]=rk[1]^rk[8] 1016 xr $s2,$s1 # rk[10]=rk[2]^rk[9] 1017 xr $s3,$s2 # rk[11]=rk[3]^rk[10] 1018 st $s0,32($key) 1019 st $s1,36($key) 1020 st $s2,40($key) 1021 st $s3,44($key) 1022 brct $rounds,.L256_continue 1023 lghi $t0,14 1024 lghi %r2,0 1025 lm${g} %r4,%r13,4*$SIZE_T($sp) 1026 br $ra 1027 1028.align 16 1029.L256_continue: 1030 lgr $t1,$s3 # temp=rk[11] 1031 srlg $i1,$s3,8 1032 srlg $i2,$s3,16 1033 srlg $i3,$s3,24 1034 nr $t1,$mask 1035 nr $i1,$mask 1036 nr $i2,$mask 1037 la $t1,0($t1,$tbl) 1038 la $i1,0($i1,$tbl) 1039 la $i2,0($i2,$tbl) 1040 la $i3,0($i3,$tbl) 1041 llgc $t1,0($t1) # Te4[rk[11]>>0] 1042 icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8 1043 icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16 1044 icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24 1045 x $t1,16($key) # rk[12]=rk[4]^... 1046 st $t1,48($key) 1047 x $t1,20($key) # rk[13]=rk[5]^rk[12] 1048 st $t1,52($key) 1049 x $t1,24($key) # rk[14]=rk[6]^rk[13] 1050 st $t1,56($key) 1051 x $t1,28($key) # rk[15]=rk[7]^rk[14] 1052 st $t1,60($key) 1053 1054 srlg $i1,$t1,8 1055 srlg $i2,$t1,16 1056 srlg $i3,$t1,24 1057 nr $t1,$mask 1058 nr $i1,$mask 1059 nr $i2,$mask 1060 1061 la $key,32($key) # key+=8 1062 la $t3,4($t3) # i++ 1063 j .L256_loop 1064 1065.Lminus1: 1066 lghi %r2,-1 1067 br $ra 1068.size AES_set_encrypt_key,.-AES_set_encrypt_key 1069 1070# void AES_set_decrypt_key(const unsigned char *in, int bits, 1071# AES_KEY *key) { 1072.globl AES_set_decrypt_key 1073.type AES_set_decrypt_key,\@function 1074.align 16 1075AES_set_decrypt_key: 1076 #st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to 1077 st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers and $key! 1078 bras $ra,_s390x_AES_set_encrypt_key 1079 #l${g} $key,4*$SIZE_T($sp) 1080 l${g} $ra,14*$SIZE_T($sp) 1081 ltgr %r2,%r2 1082 bnzr $ra 1083___ 1084$code.=<<___ if (!$softonly); 1085 #l $t0,240($key) 1086 lhi $t1,16 1087 cr $t0,$t1 1088 jl .Lgo 1089 oill $t0,S390X_DECRYPT # set "decrypt" bit 1090 st $t0,240($key) 1091 br $ra 1092___ 1093$code.=<<___; 1094.align 16 1095.Lgo: lgr $rounds,$t0 #llgf $rounds,240($key) 1096 la $i1,0($key) 1097 sllg $i2,$rounds,4 1098 la $i2,0($i2,$key) 1099 srl $rounds,1 1100 lghi $t1,-16 1101 1102.align 16 1103.Linv: lmg $s0,$s1,0($i1) 1104 lmg $s2,$s3,0($i2) 1105 stmg $s0,$s1,0($i2) 1106 stmg $s2,$s3,0($i1) 1107 la $i1,16($i1) 1108 la $i2,0($t1,$i2) 1109 brct $rounds,.Linv 1110___ 1111$mask80=$i1; 1112$mask1b=$i2; 1113$maskfe=$i3; 1114$code.=<<___; 1115 llgf $rounds,240($key) 1116 aghi $rounds,-1 1117 sll $rounds,2 # (rounds-1)*4 1118 llilh $mask80,0x8080 1119 llilh $mask1b,0x1b1b 1120 llilh $maskfe,0xfefe 1121 oill $mask80,0x8080 1122 oill $mask1b,0x1b1b 1123 oill $maskfe,0xfefe 1124 1125.align 16 1126.Lmix: l $s0,16($key) # tp1 1127 lr $s1,$s0 1128 ngr $s1,$mask80 1129 srlg $t1,$s1,7 1130 slr $s1,$t1 1131 nr $s1,$mask1b 1132 sllg $t1,$s0,1 1133 nr $t1,$maskfe 1134 xr $s1,$t1 # tp2 1135 1136 lr $s2,$s1 1137 ngr $s2,$mask80 1138 srlg $t1,$s2,7 1139 slr $s2,$t1 1140 nr $s2,$mask1b 1141 sllg $t1,$s1,1 1142 nr $t1,$maskfe 1143 xr $s2,$t1 # tp4 1144 1145 lr $s3,$s2 1146 ngr $s3,$mask80 1147 srlg $t1,$s3,7 1148 slr $s3,$t1 1149 nr $s3,$mask1b 1150 sllg $t1,$s2,1 1151 nr $t1,$maskfe 1152 xr $s3,$t1 # tp8 1153 1154 xr $s1,$s0 # tp2^tp1 1155 xr $s2,$s0 # tp4^tp1 1156 rll $s0,$s0,24 # = ROTATE(tp1,8) 1157 xr $s2,$s3 # ^=tp8 1158 xr $s0,$s1 # ^=tp2^tp1 1159 xr $s1,$s3 # tp2^tp1^tp8 1160 xr $s0,$s2 # ^=tp4^tp1^tp8 1161 rll $s1,$s1,8 1162 rll $s2,$s2,16 1163 xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24) 1164 rll $s3,$s3,24 1165 xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16) 1166 xr $s0,$s3 # ^= ROTATE(tp8,8) 1167 1168 st $s0,16($key) 1169 la $key,4($key) 1170 brct $rounds,.Lmix 1171 1172 lm${g} %r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key! 1173 lghi %r2,0 1174 br $ra 1175.size AES_set_decrypt_key,.-AES_set_decrypt_key 1176___ 1177 1178######################################################################## 1179# void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, 1180# size_t length, const AES_KEY *key, 1181# unsigned char *ivec, const int enc) 1182{ 1183my $inp="%r2"; 1184my $out="%r4"; # length and out are swapped 1185my $len="%r3"; 1186my $key="%r5"; 1187my $ivp="%r6"; 1188 1189$code.=<<___; 1190.globl AES_cbc_encrypt 1191.type AES_cbc_encrypt,\@function 1192.align 16 1193AES_cbc_encrypt: 1194 xgr %r3,%r4 # flip %r3 and %r4, out and len 1195 xgr %r4,%r3 1196 xgr %r3,%r4 1197___ 1198$code.=<<___ if (!$softonly); 1199 lhi %r0,16 1200 cl %r0,240($key) 1201 jh .Lcbc_software 1202 1203 lg %r0,0($ivp) # copy ivec 1204 lg %r1,8($ivp) 1205 stmg %r0,%r1,16($sp) 1206 lmg %r0,%r1,0($key) # copy key, cover 256 bit 1207 stmg %r0,%r1,32($sp) 1208 lmg %r0,%r1,16($key) 1209 stmg %r0,%r1,48($sp) 1210 l %r0,240($key) # load kmc code 1211 lghi $key,15 # res=len%16, len-=res; 1212 ngr $key,$len 1213 sl${g}r $len,$key 1214 la %r1,16($sp) # parameter block - ivec || key 1215 jz .Lkmc_truncated 1216 .long 0xb92f0042 # kmc %r4,%r2 1217 brc 1,.-4 # pay attention to "partial completion" 1218 ltr $key,$key 1219 jnz .Lkmc_truncated 1220.Lkmc_done: 1221 lmg %r0,%r1,16($sp) # copy ivec to caller 1222 stg %r0,0($ivp) 1223 stg %r1,8($ivp) 1224 br $ra 1225.align 16 1226.Lkmc_truncated: 1227 ahi $key,-1 # it's the way it's encoded in mvc 1228 tmll %r0,S390X_DECRYPT 1229 jnz .Lkmc_truncated_dec 1230 lghi %r1,0 1231 stg %r1,16*$SIZE_T($sp) 1232 stg %r1,16*$SIZE_T+8($sp) 1233 bras %r1,1f 1234 mvc 16*$SIZE_T(1,$sp),0($inp) 12351: ex $key,0(%r1) 1236 la %r1,16($sp) # restore parameter block 1237 la $inp,16*$SIZE_T($sp) 1238 lghi $len,16 1239 .long 0xb92f0042 # kmc %r4,%r2 1240 j .Lkmc_done 1241.align 16 1242.Lkmc_truncated_dec: 1243 st${g} $out,4*$SIZE_T($sp) 1244 la $out,16*$SIZE_T($sp) 1245 lghi $len,16 1246 .long 0xb92f0042 # kmc %r4,%r2 1247 l${g} $out,4*$SIZE_T($sp) 1248 bras %r1,2f 1249 mvc 0(1,$out),16*$SIZE_T($sp) 12502: ex $key,0(%r1) 1251 j .Lkmc_done 1252.align 16 1253.Lcbc_software: 1254___ 1255$code.=<<___; 1256 stm${g} $key,$ra,5*$SIZE_T($sp) 1257 lhi %r0,0 1258 cl %r0,`$stdframe+$SIZE_T-4`($sp) 1259 je .Lcbc_decrypt 1260 1261 larl $tbl,AES_Te 1262 1263 llgf $s0,0($ivp) 1264 llgf $s1,4($ivp) 1265 llgf $s2,8($ivp) 1266 llgf $s3,12($ivp) 1267 1268 lghi $t0,16 1269 sl${g}r $len,$t0 1270 brc 4,.Lcbc_enc_tail # if borrow 1271.Lcbc_enc_loop: 1272 stm${g} $inp,$out,2*$SIZE_T($sp) 1273 x $s0,0($inp) 1274 x $s1,4($inp) 1275 x $s2,8($inp) 1276 x $s3,12($inp) 1277 lgr %r4,$key 1278 1279 bras $ra,_s390x_AES_encrypt 1280 1281 lm${g} $inp,$key,2*$SIZE_T($sp) 1282 st $s0,0($out) 1283 st $s1,4($out) 1284 st $s2,8($out) 1285 st $s3,12($out) 1286 1287 la $inp,16($inp) 1288 la $out,16($out) 1289 lghi $t0,16 1290 lt${g}r $len,$len 1291 jz .Lcbc_enc_done 1292 sl${g}r $len,$t0 1293 brc 4,.Lcbc_enc_tail # if borrow 1294 j .Lcbc_enc_loop 1295.align 16 1296.Lcbc_enc_done: 1297 l${g} $ivp,6*$SIZE_T($sp) 1298 st $s0,0($ivp) 1299 st $s1,4($ivp) 1300 st $s2,8($ivp) 1301 st $s3,12($ivp) 1302 1303 lm${g} %r7,$ra,7*$SIZE_T($sp) 1304 br $ra 1305 1306.align 16 1307.Lcbc_enc_tail: 1308 aghi $len,15 1309 lghi $t0,0 1310 stg $t0,16*$SIZE_T($sp) 1311 stg $t0,16*$SIZE_T+8($sp) 1312 bras $t1,3f 1313 mvc 16*$SIZE_T(1,$sp),0($inp) 13143: ex $len,0($t1) 1315 lghi $len,0 1316 la $inp,16*$SIZE_T($sp) 1317 j .Lcbc_enc_loop 1318 1319.align 16 1320.Lcbc_decrypt: 1321 larl $tbl,AES_Td 1322 1323 lg $t0,0($ivp) 1324 lg $t1,8($ivp) 1325 stmg $t0,$t1,16*$SIZE_T($sp) 1326 1327.Lcbc_dec_loop: 1328 stm${g} $inp,$out,2*$SIZE_T($sp) 1329 llgf $s0,0($inp) 1330 llgf $s1,4($inp) 1331 llgf $s2,8($inp) 1332 llgf $s3,12($inp) 1333 lgr %r4,$key 1334 1335 bras $ra,_s390x_AES_decrypt 1336 1337 lm${g} $inp,$key,2*$SIZE_T($sp) 1338 sllg $s0,$s0,32 1339 sllg $s2,$s2,32 1340 lr $s0,$s1 1341 lr $s2,$s3 1342 1343 lg $t0,0($inp) 1344 lg $t1,8($inp) 1345 xg $s0,16*$SIZE_T($sp) 1346 xg $s2,16*$SIZE_T+8($sp) 1347 lghi $s1,16 1348 sl${g}r $len,$s1 1349 brc 4,.Lcbc_dec_tail # if borrow 1350 brc 2,.Lcbc_dec_done # if zero 1351 stg $s0,0($out) 1352 stg $s2,8($out) 1353 stmg $t0,$t1,16*$SIZE_T($sp) 1354 1355 la $inp,16($inp) 1356 la $out,16($out) 1357 j .Lcbc_dec_loop 1358 1359.Lcbc_dec_done: 1360 stg $s0,0($out) 1361 stg $s2,8($out) 1362.Lcbc_dec_exit: 1363 lm${g} %r6,$ra,6*$SIZE_T($sp) 1364 stmg $t0,$t1,0($ivp) 1365 1366 br $ra 1367 1368.align 16 1369.Lcbc_dec_tail: 1370 aghi $len,15 1371 stg $s0,16*$SIZE_T($sp) 1372 stg $s2,16*$SIZE_T+8($sp) 1373 bras $s1,4f 1374 mvc 0(1,$out),16*$SIZE_T($sp) 13754: ex $len,0($s1) 1376 j .Lcbc_dec_exit 1377.size AES_cbc_encrypt,.-AES_cbc_encrypt 1378___ 1379} 1380######################################################################## 1381# void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out, 1382# size_t blocks, const AES_KEY *key, 1383# const unsigned char *ivec) 1384{ 1385my $inp="%r2"; 1386my $out="%r4"; # blocks and out are swapped 1387my $len="%r3"; 1388my $key="%r5"; my $iv0="%r5"; 1389my $ivp="%r6"; 1390my $fp ="%r7"; 1391 1392$code.=<<___; 1393.globl AES_ctr32_encrypt 1394.type AES_ctr32_encrypt,\@function 1395.align 16 1396AES_ctr32_encrypt: 1397 xgr %r3,%r4 # flip %r3 and %r4, $out and $len 1398 xgr %r4,%r3 1399 xgr %r3,%r4 1400 llgfr $len,$len # safe in ctr32 subroutine even in 64-bit case 1401___ 1402$code.=<<___ if (!$softonly); 1403 l %r0,240($key) 1404 lhi %r1,16 1405 clr %r0,%r1 1406 jl .Lctr32_software 1407 1408 st${g} $s2,10*$SIZE_T($sp) 1409 st${g} $s3,11*$SIZE_T($sp) 1410 1411 clr $len,%r1 # does work even in 64-bit mode 1412 jle .Lctr32_nokma # kma is slower for <= 16 blocks 1413 1414 larl %r1,OPENSSL_s390xcap_P 1415 lr $s2,%r0 1416 llihh $s3,0x8000 1417 srlg $s3,$s3,0($s2) 1418 ng $s3,S390X_KMA(%r1) # check kma capability vector 1419 jz .Lctr32_nokma 1420 1421 l${g}hi %r1,-$stdframe-112 1422 l${g}r $s3,$sp 1423 la $sp,0(%r1,$sp) # prepare parameter block 1424 1425 lhi %r1,0x0600 1426 sllg $len,$len,4 1427 or %r0,%r1 # set HS and LAAD flags 1428 1429 st${g} $s3,0($sp) # backchain 1430 la %r1,$stdframe($sp) 1431 1432 lmg $s2,$s3,0($key) # copy key 1433 stg $s2,$stdframe+80($sp) 1434 stg $s3,$stdframe+88($sp) 1435 lmg $s2,$s3,16($key) 1436 stg $s2,$stdframe+96($sp) 1437 stg $s3,$stdframe+104($sp) 1438 1439 lmg $s2,$s3,0($ivp) # copy iv 1440 stg $s2,$stdframe+64($sp) 1441 ahi $s3,-1 # kma requires counter-1 1442 stg $s3,$stdframe+72($sp) 1443 st $s3,$stdframe+12($sp) # copy counter 1444 1445 lghi $s2,0 # no AAD 1446 lghi $s3,0 1447 1448 .long 0xb929a042 # kma $out,$s2,$inp 1449 brc 1,.-4 # pay attention to "partial completion" 1450 1451 stg %r0,$stdframe+80($sp) # wipe key 1452 stg %r0,$stdframe+88($sp) 1453 stg %r0,$stdframe+96($sp) 1454 stg %r0,$stdframe+104($sp) 1455 la $sp,$stdframe+112($sp) 1456 1457 lm${g} $s2,$s3,10*$SIZE_T($sp) 1458 br $ra 1459 1460.align 16 1461.Lctr32_nokma: 1462 stm${g} %r6,$s1,6*$SIZE_T($sp) 1463 1464 slgr $out,$inp 1465 la %r1,0($key) # %r1 is permanent copy of $key 1466 lg $iv0,0($ivp) # load ivec 1467 lg $ivp,8($ivp) 1468 1469 # prepare and allocate stack frame at the top of 4K page 1470 # with 1K reserved for eventual signal handling 1471 lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer 1472 lghi $s1,-4096 1473 algr $s0,$sp 1474 lgr $fp,$sp 1475 ngr $s0,$s1 # align at page boundary 1476 slgr $fp,$s0 # total buffer size 1477 lgr $s2,$sp 1478 lghi $s1,1024+16 # sl[g]fi is extended-immediate facility 1479 slgr $fp,$s1 # deduct reservation to get usable buffer size 1480 # buffer size is at lest 256 and at most 3072+256-16 1481 1482 la $sp,1024($s0) # alloca 1483 srlg $fp,$fp,4 # convert bytes to blocks, minimum 16 1484 st${g} $s2,0($sp) # back-chain 1485 st${g} $fp,$SIZE_T($sp) 1486 1487 slgr $len,$fp 1488 brc 1,.Lctr32_hw_switch # not zero, no borrow 1489 algr $fp,$len # input is shorter than allocated buffer 1490 lghi $len,0 1491 st${g} $fp,$SIZE_T($sp) 1492 1493.Lctr32_hw_switch: 1494___ 1495$code.=<<___ if (!$softonly && 0);# kmctr code was measured to be ~12% slower 1496 llgfr $s0,%r0 1497 lgr $s1,%r1 1498 larl %r1,OPENSSL_s390xcap_P 1499 llihh %r0,0x8000 # check if kmctr supports the function code 1500 srlg %r0,%r0,0($s0) 1501 ng %r0,S390X_KMCTR(%r1) # check kmctr capability vector 1502 lgr %r0,$s0 1503 lgr %r1,$s1 1504 jz .Lctr32_km_loop 1505 1506####### kmctr code 1507 algr $out,$inp # restore $out 1508 lgr $s1,$len # $s1 undertakes $len 1509 j .Lctr32_kmctr_loop 1510.align 16 1511.Lctr32_kmctr_loop: 1512 la $s2,16($sp) 1513 lgr $s3,$fp 1514.Lctr32_kmctr_prepare: 1515 stg $iv0,0($s2) 1516 stg $ivp,8($s2) 1517 la $s2,16($s2) 1518 ahi $ivp,1 # 32-bit increment, preserves upper half 1519 brct $s3,.Lctr32_kmctr_prepare 1520 1521 #la $inp,0($inp) # inp 1522 sllg $len,$fp,4 # len 1523 #la $out,0($out) # out 1524 la $s2,16($sp) # iv 1525 .long 0xb92da042 # kmctr $out,$s2,$inp 1526 brc 1,.-4 # pay attention to "partial completion" 1527 1528 slgr $s1,$fp 1529 brc 1,.Lctr32_kmctr_loop # not zero, no borrow 1530 algr $fp,$s1 1531 lghi $s1,0 1532 brc 4+1,.Lctr32_kmctr_loop # not zero 1533 1534 l${g} $sp,0($sp) 1535 lm${g} %r6,$s3,6*$SIZE_T($sp) 1536 br $ra 1537.align 16 1538___ 1539$code.=<<___ if (!$softonly); 1540.Lctr32_km_loop: 1541 la $s2,16($sp) 1542 lgr $s3,$fp 1543.Lctr32_km_prepare: 1544 stg $iv0,0($s2) 1545 stg $ivp,8($s2) 1546 la $s2,16($s2) 1547 ahi $ivp,1 # 32-bit increment, preserves upper half 1548 brct $s3,.Lctr32_km_prepare 1549 1550 la $s0,16($sp) # inp 1551 sllg $s1,$fp,4 # len 1552 la $s2,16($sp) # out 1553 .long 0xb92e00a8 # km %r10,%r8 1554 brc 1,.-4 # pay attention to "partial completion" 1555 1556 la $s2,16($sp) 1557 lgr $s3,$fp 1558 slgr $s2,$inp 1559.Lctr32_km_xor: 1560 lg $s0,0($inp) 1561 lg $s1,8($inp) 1562 xg $s0,0($s2,$inp) 1563 xg $s1,8($s2,$inp) 1564 stg $s0,0($out,$inp) 1565 stg $s1,8($out,$inp) 1566 la $inp,16($inp) 1567 brct $s3,.Lctr32_km_xor 1568 1569 slgr $len,$fp 1570 brc 1,.Lctr32_km_loop # not zero, no borrow 1571 algr $fp,$len 1572 lghi $len,0 1573 brc 4+1,.Lctr32_km_loop # not zero 1574 1575 l${g} $s0,0($sp) 1576 l${g} $s1,$SIZE_T($sp) 1577 la $s2,16($sp) 1578.Lctr32_km_zap: 1579 stg $s0,0($s2) 1580 stg $s0,8($s2) 1581 la $s2,16($s2) 1582 brct $s1,.Lctr32_km_zap 1583 1584 la $sp,0($s0) 1585 lm${g} %r6,$s3,6*$SIZE_T($sp) 1586 br $ra 1587.align 16 1588.Lctr32_software: 1589___ 1590$code.=<<___; 1591 stm${g} $key,$ra,5*$SIZE_T($sp) 1592 sl${g}r $inp,$out 1593 larl $tbl,AES_Te 1594 llgf $t1,12($ivp) 1595 1596.Lctr32_loop: 1597 stm${g} $inp,$out,2*$SIZE_T($sp) 1598 llgf $s0,0($ivp) 1599 llgf $s1,4($ivp) 1600 llgf $s2,8($ivp) 1601 lgr $s3,$t1 1602 st $t1,16*$SIZE_T($sp) 1603 lgr %r4,$key 1604 1605 bras $ra,_s390x_AES_encrypt 1606 1607 lm${g} $inp,$ivp,2*$SIZE_T($sp) 1608 llgf $t1,16*$SIZE_T($sp) 1609 x $s0,0($inp,$out) 1610 x $s1,4($inp,$out) 1611 x $s2,8($inp,$out) 1612 x $s3,12($inp,$out) 1613 stm $s0,$s3,0($out) 1614 1615 la $out,16($out) 1616 ahi $t1,1 # 32-bit increment 1617 brct $len,.Lctr32_loop 1618 1619 lm${g} %r6,$ra,6*$SIZE_T($sp) 1620 br $ra 1621.size AES_ctr32_encrypt,.-AES_ctr32_encrypt 1622___ 1623} 1624 1625######################################################################## 1626# void AES_xts_encrypt(const unsigned char *inp, unsigned char *out, 1627# size_t len, const AES_KEY *key1, const AES_KEY *key2, 1628# const unsigned char iv[16]); 1629# 1630{ 1631my $inp="%r2"; 1632my $out="%r4"; # len and out are swapped 1633my $len="%r3"; 1634my $key1="%r5"; # $i1 1635my $key2="%r6"; # $i2 1636my $fp="%r7"; # $i3 1637my $tweak=16*$SIZE_T+16; # or $stdframe-16, bottom of the frame... 1638 1639$code.=<<___; 1640.type _s390x_xts_km,\@function 1641.align 16 1642_s390x_xts_km: 1643___ 1644$code.=<<___ if(1); 1645 llgfr $s0,%r0 # put aside the function code 1646 lghi $s1,0x7f 1647 nr $s1,%r0 1648 larl %r1,OPENSSL_s390xcap_P 1649 llihh %r0,0x8000 1650 srlg %r0,%r0,32($s1) # check for 32+function code 1651 ng %r0,S390X_KM(%r1) # check km capability vector 1652 lgr %r0,$s0 # restore the function code 1653 la %r1,0($key1) # restore $key1 1654 jz .Lxts_km_vanilla 1655 1656 lmg $i2,$i3,$tweak($sp) # put aside the tweak value 1657 algr $out,$inp 1658 1659 oill %r0,32 # switch to xts function code 1660 aghi $s1,-18 # 1661 sllg $s1,$s1,3 # (function code - 18)*8, 0 or 16 1662 la %r1,$tweak-16($sp) 1663 slgr %r1,$s1 # parameter block position 1664 lmg $s0,$s3,0($key1) # load 256 bits of key material, 1665 stmg $s0,$s3,0(%r1) # and copy it to parameter block. 1666 # yes, it contains junk and overlaps 1667 # with the tweak in 128-bit case. 1668 # it's done to avoid conditional 1669 # branch. 1670 stmg $i2,$i3,$tweak($sp) # "re-seat" the tweak value 1671 1672 .long 0xb92e0042 # km %r4,%r2 1673 brc 1,.-4 # pay attention to "partial completion" 1674 1675 lrvg $s0,$tweak+0($sp) # load the last tweak 1676 lrvg $s1,$tweak+8($sp) 1677 stmg %r0,%r3,$tweak-32($sp) # wipe copy of the key 1678 1679 nill %r0,0xffdf # switch back to original function code 1680 la %r1,0($key1) # restore pointer to $key1 1681 slgr $out,$inp 1682 1683 llgc $len,2*$SIZE_T-1($sp) 1684 nill $len,0x0f # $len%=16 1685 br $ra 1686 1687.align 16 1688.Lxts_km_vanilla: 1689___ 1690$code.=<<___; 1691 # prepare and allocate stack frame at the top of 4K page 1692 # with 1K reserved for eventual signal handling 1693 lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer 1694 lghi $s1,-4096 1695 algr $s0,$sp 1696 lgr $fp,$sp 1697 ngr $s0,$s1 # align at page boundary 1698 slgr $fp,$s0 # total buffer size 1699 lgr $s2,$sp 1700 lghi $s1,1024+16 # sl[g]fi is extended-immediate facility 1701 slgr $fp,$s1 # deduct reservation to get usable buffer size 1702 # buffer size is at lest 256 and at most 3072+256-16 1703 1704 la $sp,1024($s0) # alloca 1705 nill $fp,0xfff0 # round to 16*n 1706 st${g} $s2,0($sp) # back-chain 1707 nill $len,0xfff0 # redundant 1708 st${g} $fp,$SIZE_T($sp) 1709 1710 slgr $len,$fp 1711 brc 1,.Lxts_km_go # not zero, no borrow 1712 algr $fp,$len # input is shorter than allocated buffer 1713 lghi $len,0 1714 st${g} $fp,$SIZE_T($sp) 1715 1716.Lxts_km_go: 1717 lrvg $s0,$tweak+0($s2) # load the tweak value in little-endian 1718 lrvg $s1,$tweak+8($s2) 1719 1720 la $s2,16($sp) # vector of ascending tweak values 1721 slgr $s2,$inp 1722 srlg $s3,$fp,4 1723 j .Lxts_km_start 1724 1725.Lxts_km_loop: 1726 la $s2,16($sp) 1727 slgr $s2,$inp 1728 srlg $s3,$fp,4 1729.Lxts_km_prepare: 1730 lghi $i1,0x87 1731 srag $i2,$s1,63 # broadcast upper bit 1732 ngr $i1,$i2 # rem 1733 algr $s0,$s0 1734 alcgr $s1,$s1 1735 xgr $s0,$i1 1736.Lxts_km_start: 1737 lrvgr $i1,$s0 # flip byte order 1738 lrvgr $i2,$s1 1739 stg $i1,0($s2,$inp) 1740 stg $i2,8($s2,$inp) 1741 xg $i1,0($inp) 1742 xg $i2,8($inp) 1743 stg $i1,0($out,$inp) 1744 stg $i2,8($out,$inp) 1745 la $inp,16($inp) 1746 brct $s3,.Lxts_km_prepare 1747 1748 slgr $inp,$fp # rewind $inp 1749 la $s2,0($out,$inp) 1750 lgr $s3,$fp 1751 .long 0xb92e00aa # km $s2,$s2 1752 brc 1,.-4 # pay attention to "partial completion" 1753 1754 la $s2,16($sp) 1755 slgr $s2,$inp 1756 srlg $s3,$fp,4 1757.Lxts_km_xor: 1758 lg $i1,0($out,$inp) 1759 lg $i2,8($out,$inp) 1760 xg $i1,0($s2,$inp) 1761 xg $i2,8($s2,$inp) 1762 stg $i1,0($out,$inp) 1763 stg $i2,8($out,$inp) 1764 la $inp,16($inp) 1765 brct $s3,.Lxts_km_xor 1766 1767 slgr $len,$fp 1768 brc 1,.Lxts_km_loop # not zero, no borrow 1769 algr $fp,$len 1770 lghi $len,0 1771 brc 4+1,.Lxts_km_loop # not zero 1772 1773 l${g} $i1,0($sp) # back-chain 1774 llgf $fp,`2*$SIZE_T-4`($sp) # bytes used 1775 la $i2,16($sp) 1776 srlg $fp,$fp,4 1777.Lxts_km_zap: 1778 stg $i1,0($i2) 1779 stg $i1,8($i2) 1780 la $i2,16($i2) 1781 brct $fp,.Lxts_km_zap 1782 1783 la $sp,0($i1) 1784 llgc $len,2*$SIZE_T-1($i1) 1785 nill $len,0x0f # $len%=16 1786 bzr $ra 1787 1788 # generate one more tweak... 1789 lghi $i1,0x87 1790 srag $i2,$s1,63 # broadcast upper bit 1791 ngr $i1,$i2 # rem 1792 algr $s0,$s0 1793 alcgr $s1,$s1 1794 xgr $s0,$i1 1795 1796 ltr $len,$len # clear zero flag 1797 br $ra 1798.size _s390x_xts_km,.-_s390x_xts_km 1799 1800.globl AES_xts_encrypt 1801.type AES_xts_encrypt,\@function 1802.align 16 1803AES_xts_encrypt: 1804 xgr %r3,%r4 # flip %r3 and %r4, $out and $len 1805 xgr %r4,%r3 1806 xgr %r3,%r4 1807___ 1808$code.=<<___ if ($SIZE_T==4); 1809 llgfr $len,$len 1810___ 1811$code.=<<___; 1812 st${g} $len,1*$SIZE_T($sp) # save copy of $len 1813 srag $len,$len,4 # formally wrong, because it expands 1814 # sign byte, but who can afford asking 1815 # to process more than 2^63-1 bytes? 1816 # I use it, because it sets condition 1817 # code... 1818 bcr 8,$ra # abort if zero (i.e. less than 16) 1819___ 1820$code.=<<___ if (!$softonly); 1821 llgf %r0,240($key2) 1822 lhi %r1,16 1823 clr %r0,%r1 1824 jl .Lxts_enc_software 1825 1826 st${g} $ra,5*$SIZE_T($sp) 1827 stm${g} %r6,$s3,6*$SIZE_T($sp) 1828 1829 sllg $len,$len,4 # $len&=~15 1830 slgr $out,$inp 1831 1832 # generate the tweak value 1833 l${g} $s3,$stdframe($sp) # pointer to iv 1834 la $s2,$tweak($sp) 1835 lmg $s0,$s1,0($s3) 1836 lghi $s3,16 1837 stmg $s0,$s1,0($s2) 1838 la %r1,0($key2) # $key2 is not needed anymore 1839 .long 0xb92e00aa # km $s2,$s2, generate the tweak 1840 brc 1,.-4 # can this happen? 1841 1842 l %r0,240($key1) 1843 la %r1,0($key1) # $key1 is not needed anymore 1844 bras $ra,_s390x_xts_km 1845 jz .Lxts_enc_km_done 1846 1847 aghi $inp,-16 # take one step back 1848 la $i3,0($out,$inp) # put aside real $out 1849.Lxts_enc_km_steal: 1850 llgc $i1,16($inp) 1851 llgc $i2,0($out,$inp) 1852 stc $i1,0($out,$inp) 1853 stc $i2,16($out,$inp) 1854 la $inp,1($inp) 1855 brct $len,.Lxts_enc_km_steal 1856 1857 la $s2,0($i3) 1858 lghi $s3,16 1859 lrvgr $i1,$s0 # flip byte order 1860 lrvgr $i2,$s1 1861 xg $i1,0($s2) 1862 xg $i2,8($s2) 1863 stg $i1,0($s2) 1864 stg $i2,8($s2) 1865 .long 0xb92e00aa # km $s2,$s2 1866 brc 1,.-4 # can this happen? 1867 lrvgr $i1,$s0 # flip byte order 1868 lrvgr $i2,$s1 1869 xg $i1,0($i3) 1870 xg $i2,8($i3) 1871 stg $i1,0($i3) 1872 stg $i2,8($i3) 1873 1874.Lxts_enc_km_done: 1875 stg $sp,$tweak+0($sp) # wipe tweak 1876 stg $sp,$tweak+8($sp) 1877 l${g} $ra,5*$SIZE_T($sp) 1878 lm${g} %r6,$s3,6*$SIZE_T($sp) 1879 br $ra 1880.align 16 1881.Lxts_enc_software: 1882___ 1883$code.=<<___; 1884 stm${g} %r6,$ra,6*$SIZE_T($sp) 1885 1886 slgr $out,$inp 1887 1888 l${g} $s3,$stdframe($sp) # ivp 1889 llgf $s0,0($s3) # load iv 1890 llgf $s1,4($s3) 1891 llgf $s2,8($s3) 1892 llgf $s3,12($s3) 1893 stm${g} %r2,%r5,2*$SIZE_T($sp) 1894 la $key,0($key2) 1895 larl $tbl,AES_Te 1896 bras $ra,_s390x_AES_encrypt # generate the tweak 1897 lm${g} %r2,%r5,2*$SIZE_T($sp) 1898 stm $s0,$s3,$tweak($sp) # save the tweak 1899 j .Lxts_enc_enter 1900 1901.align 16 1902.Lxts_enc_loop: 1903 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian 1904 lrvg $s3,$tweak+8($sp) 1905 lghi %r1,0x87 1906 srag %r0,$s3,63 # broadcast upper bit 1907 ngr %r1,%r0 # rem 1908 algr $s1,$s1 1909 alcgr $s3,$s3 1910 xgr $s1,%r1 1911 lrvgr $s1,$s1 # flip byte order 1912 lrvgr $s3,$s3 1913 srlg $s0,$s1,32 # smash the tweak to 4x32-bits 1914 stg $s1,$tweak+0($sp) # save the tweak 1915 llgfr $s1,$s1 1916 srlg $s2,$s3,32 1917 stg $s3,$tweak+8($sp) 1918 llgfr $s3,$s3 1919 la $inp,16($inp) # $inp+=16 1920.Lxts_enc_enter: 1921 x $s0,0($inp) # ^=*($inp) 1922 x $s1,4($inp) 1923 x $s2,8($inp) 1924 x $s3,12($inp) 1925 stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing 1926 la $key,0($key1) 1927 bras $ra,_s390x_AES_encrypt 1928 lm${g} %r2,%r5,2*$SIZE_T($sp) 1929 x $s0,$tweak+0($sp) # ^=tweak 1930 x $s1,$tweak+4($sp) 1931 x $s2,$tweak+8($sp) 1932 x $s3,$tweak+12($sp) 1933 st $s0,0($out,$inp) 1934 st $s1,4($out,$inp) 1935 st $s2,8($out,$inp) 1936 st $s3,12($out,$inp) 1937 brct${g} $len,.Lxts_enc_loop 1938 1939 llgc $len,`2*$SIZE_T-1`($sp) 1940 nill $len,0x0f # $len%16 1941 jz .Lxts_enc_done 1942 1943 la $i3,0($inp,$out) # put aside real $out 1944.Lxts_enc_steal: 1945 llgc %r0,16($inp) 1946 llgc %r1,0($out,$inp) 1947 stc %r0,0($out,$inp) 1948 stc %r1,16($out,$inp) 1949 la $inp,1($inp) 1950 brct $len,.Lxts_enc_steal 1951 la $out,0($i3) # restore real $out 1952 1953 # generate last tweak... 1954 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian 1955 lrvg $s3,$tweak+8($sp) 1956 lghi %r1,0x87 1957 srag %r0,$s3,63 # broadcast upper bit 1958 ngr %r1,%r0 # rem 1959 algr $s1,$s1 1960 alcgr $s3,$s3 1961 xgr $s1,%r1 1962 lrvgr $s1,$s1 # flip byte order 1963 lrvgr $s3,$s3 1964 srlg $s0,$s1,32 # smash the tweak to 4x32-bits 1965 stg $s1,$tweak+0($sp) # save the tweak 1966 llgfr $s1,$s1 1967 srlg $s2,$s3,32 1968 stg $s3,$tweak+8($sp) 1969 llgfr $s3,$s3 1970 1971 x $s0,0($out) # ^=*(inp)|stolen cipther-text 1972 x $s1,4($out) 1973 x $s2,8($out) 1974 x $s3,12($out) 1975 st${g} $out,4*$SIZE_T($sp) 1976 la $key,0($key1) 1977 bras $ra,_s390x_AES_encrypt 1978 l${g} $out,4*$SIZE_T($sp) 1979 x $s0,`$tweak+0`($sp) # ^=tweak 1980 x $s1,`$tweak+4`($sp) 1981 x $s2,`$tweak+8`($sp) 1982 x $s3,`$tweak+12`($sp) 1983 st $s0,0($out) 1984 st $s1,4($out) 1985 st $s2,8($out) 1986 st $s3,12($out) 1987 1988.Lxts_enc_done: 1989 stg $sp,$tweak+0($sp) # wipe tweak 1990 stg $sp,$tweak+8($sp) 1991 lm${g} %r6,$ra,6*$SIZE_T($sp) 1992 br $ra 1993.size AES_xts_encrypt,.-AES_xts_encrypt 1994___ 1995# void AES_xts_decrypt(const unsigned char *inp, unsigned char *out, 1996# size_t len, const AES_KEY *key1, const AES_KEY *key2, 1997# const unsigned char iv[16]); 1998# 1999$code.=<<___; 2000.globl AES_xts_decrypt 2001.type AES_xts_decrypt,\@function 2002.align 16 2003AES_xts_decrypt: 2004 xgr %r3,%r4 # flip %r3 and %r4, $out and $len 2005 xgr %r4,%r3 2006 xgr %r3,%r4 2007___ 2008$code.=<<___ if ($SIZE_T==4); 2009 llgfr $len,$len 2010___ 2011$code.=<<___; 2012 st${g} $len,1*$SIZE_T($sp) # save copy of $len 2013 aghi $len,-16 2014 bcr 4,$ra # abort if less than zero. formally 2015 # wrong, because $len is unsigned, 2016 # but who can afford asking to 2017 # process more than 2^63-1 bytes? 2018 tmll $len,0x0f 2019 jnz .Lxts_dec_proceed 2020 aghi $len,16 2021.Lxts_dec_proceed: 2022___ 2023$code.=<<___ if (!$softonly); 2024 llgf %r0,240($key2) 2025 lhi %r1,16 2026 clr %r0,%r1 2027 jl .Lxts_dec_software 2028 2029 st${g} $ra,5*$SIZE_T($sp) 2030 stm${g} %r6,$s3,6*$SIZE_T($sp) 2031 2032 nill $len,0xfff0 # $len&=~15 2033 slgr $out,$inp 2034 2035 # generate the tweak value 2036 l${g} $s3,$stdframe($sp) # pointer to iv 2037 la $s2,$tweak($sp) 2038 lmg $s0,$s1,0($s3) 2039 lghi $s3,16 2040 stmg $s0,$s1,0($s2) 2041 la %r1,0($key2) # $key2 is not needed past this point 2042 .long 0xb92e00aa # km $s2,$s2, generate the tweak 2043 brc 1,.-4 # can this happen? 2044 2045 l %r0,240($key1) 2046 la %r1,0($key1) # $key1 is not needed anymore 2047 2048 ltgr $len,$len 2049 jz .Lxts_dec_km_short 2050 bras $ra,_s390x_xts_km 2051 jz .Lxts_dec_km_done 2052 2053 lrvgr $s2,$s0 # make copy in reverse byte order 2054 lrvgr $s3,$s1 2055 j .Lxts_dec_km_2ndtweak 2056 2057.Lxts_dec_km_short: 2058 llgc $len,`2*$SIZE_T-1`($sp) 2059 nill $len,0x0f # $len%=16 2060 lrvg $s0,$tweak+0($sp) # load the tweak 2061 lrvg $s1,$tweak+8($sp) 2062 lrvgr $s2,$s0 # make copy in reverse byte order 2063 lrvgr $s3,$s1 2064 2065.Lxts_dec_km_2ndtweak: 2066 lghi $i1,0x87 2067 srag $i2,$s1,63 # broadcast upper bit 2068 ngr $i1,$i2 # rem 2069 algr $s0,$s0 2070 alcgr $s1,$s1 2071 xgr $s0,$i1 2072 lrvgr $i1,$s0 # flip byte order 2073 lrvgr $i2,$s1 2074 2075 xg $i1,0($inp) 2076 xg $i2,8($inp) 2077 stg $i1,0($out,$inp) 2078 stg $i2,8($out,$inp) 2079 la $i2,0($out,$inp) 2080 lghi $i3,16 2081 .long 0xb92e0066 # km $i2,$i2 2082 brc 1,.-4 # can this happen? 2083 lrvgr $i1,$s0 2084 lrvgr $i2,$s1 2085 xg $i1,0($out,$inp) 2086 xg $i2,8($out,$inp) 2087 stg $i1,0($out,$inp) 2088 stg $i2,8($out,$inp) 2089 2090 la $i3,0($out,$inp) # put aside real $out 2091.Lxts_dec_km_steal: 2092 llgc $i1,16($inp) 2093 llgc $i2,0($out,$inp) 2094 stc $i1,0($out,$inp) 2095 stc $i2,16($out,$inp) 2096 la $inp,1($inp) 2097 brct $len,.Lxts_dec_km_steal 2098 2099 lgr $s0,$s2 2100 lgr $s1,$s3 2101 xg $s0,0($i3) 2102 xg $s1,8($i3) 2103 stg $s0,0($i3) 2104 stg $s1,8($i3) 2105 la $s0,0($i3) 2106 lghi $s1,16 2107 .long 0xb92e0088 # km $s0,$s0 2108 brc 1,.-4 # can this happen? 2109 xg $s2,0($i3) 2110 xg $s3,8($i3) 2111 stg $s2,0($i3) 2112 stg $s3,8($i3) 2113.Lxts_dec_km_done: 2114 stg $sp,$tweak+0($sp) # wipe tweak 2115 stg $sp,$tweak+8($sp) 2116 l${g} $ra,5*$SIZE_T($sp) 2117 lm${g} %r6,$s3,6*$SIZE_T($sp) 2118 br $ra 2119.align 16 2120.Lxts_dec_software: 2121___ 2122$code.=<<___; 2123 stm${g} %r6,$ra,6*$SIZE_T($sp) 2124 2125 srlg $len,$len,4 2126 slgr $out,$inp 2127 2128 l${g} $s3,$stdframe($sp) # ivp 2129 llgf $s0,0($s3) # load iv 2130 llgf $s1,4($s3) 2131 llgf $s2,8($s3) 2132 llgf $s3,12($s3) 2133 stm${g} %r2,%r5,2*$SIZE_T($sp) 2134 la $key,0($key2) 2135 larl $tbl,AES_Te 2136 bras $ra,_s390x_AES_encrypt # generate the tweak 2137 lm${g} %r2,%r5,2*$SIZE_T($sp) 2138 larl $tbl,AES_Td 2139 lt${g}r $len,$len 2140 stm $s0,$s3,$tweak($sp) # save the tweak 2141 jz .Lxts_dec_short 2142 j .Lxts_dec_enter 2143 2144.align 16 2145.Lxts_dec_loop: 2146 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian 2147 lrvg $s3,$tweak+8($sp) 2148 lghi %r1,0x87 2149 srag %r0,$s3,63 # broadcast upper bit 2150 ngr %r1,%r0 # rem 2151 algr $s1,$s1 2152 alcgr $s3,$s3 2153 xgr $s1,%r1 2154 lrvgr $s1,$s1 # flip byte order 2155 lrvgr $s3,$s3 2156 srlg $s0,$s1,32 # smash the tweak to 4x32-bits 2157 stg $s1,$tweak+0($sp) # save the tweak 2158 llgfr $s1,$s1 2159 srlg $s2,$s3,32 2160 stg $s3,$tweak+8($sp) 2161 llgfr $s3,$s3 2162.Lxts_dec_enter: 2163 x $s0,0($inp) # tweak^=*(inp) 2164 x $s1,4($inp) 2165 x $s2,8($inp) 2166 x $s3,12($inp) 2167 stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing 2168 la $key,0($key1) 2169 bras $ra,_s390x_AES_decrypt 2170 lm${g} %r2,%r5,2*$SIZE_T($sp) 2171 x $s0,$tweak+0($sp) # ^=tweak 2172 x $s1,$tweak+4($sp) 2173 x $s2,$tweak+8($sp) 2174 x $s3,$tweak+12($sp) 2175 st $s0,0($out,$inp) 2176 st $s1,4($out,$inp) 2177 st $s2,8($out,$inp) 2178 st $s3,12($out,$inp) 2179 la $inp,16($inp) 2180 brct${g} $len,.Lxts_dec_loop 2181 2182 llgc $len,`2*$SIZE_T-1`($sp) 2183 nill $len,0x0f # $len%16 2184 jz .Lxts_dec_done 2185 2186 # generate pair of tweaks... 2187 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian 2188 lrvg $s3,$tweak+8($sp) 2189 lghi %r1,0x87 2190 srag %r0,$s3,63 # broadcast upper bit 2191 ngr %r1,%r0 # rem 2192 algr $s1,$s1 2193 alcgr $s3,$s3 2194 xgr $s1,%r1 2195 lrvgr $i2,$s1 # flip byte order 2196 lrvgr $i3,$s3 2197 stmg $i2,$i3,$tweak($sp) # save the 1st tweak 2198 j .Lxts_dec_2ndtweak 2199 2200.align 16 2201.Lxts_dec_short: 2202 llgc $len,`2*$SIZE_T-1`($sp) 2203 nill $len,0x0f # $len%16 2204 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian 2205 lrvg $s3,$tweak+8($sp) 2206.Lxts_dec_2ndtweak: 2207 lghi %r1,0x87 2208 srag %r0,$s3,63 # broadcast upper bit 2209 ngr %r1,%r0 # rem 2210 algr $s1,$s1 2211 alcgr $s3,$s3 2212 xgr $s1,%r1 2213 lrvgr $s1,$s1 # flip byte order 2214 lrvgr $s3,$s3 2215 srlg $s0,$s1,32 # smash the tweak to 4x32-bits 2216 stg $s1,$tweak-16+0($sp) # save the 2nd tweak 2217 llgfr $s1,$s1 2218 srlg $s2,$s3,32 2219 stg $s3,$tweak-16+8($sp) 2220 llgfr $s3,$s3 2221 2222 x $s0,0($inp) # tweak_the_2nd^=*(inp) 2223 x $s1,4($inp) 2224 x $s2,8($inp) 2225 x $s3,12($inp) 2226 stm${g} %r2,%r3,2*$SIZE_T($sp) 2227 la $key,0($key1) 2228 bras $ra,_s390x_AES_decrypt 2229 lm${g} %r2,%r5,2*$SIZE_T($sp) 2230 x $s0,$tweak-16+0($sp) # ^=tweak_the_2nd 2231 x $s1,$tweak-16+4($sp) 2232 x $s2,$tweak-16+8($sp) 2233 x $s3,$tweak-16+12($sp) 2234 st $s0,0($out,$inp) 2235 st $s1,4($out,$inp) 2236 st $s2,8($out,$inp) 2237 st $s3,12($out,$inp) 2238 2239 la $i3,0($out,$inp) # put aside real $out 2240.Lxts_dec_steal: 2241 llgc %r0,16($inp) 2242 llgc %r1,0($out,$inp) 2243 stc %r0,0($out,$inp) 2244 stc %r1,16($out,$inp) 2245 la $inp,1($inp) 2246 brct $len,.Lxts_dec_steal 2247 la $out,0($i3) # restore real $out 2248 2249 lm $s0,$s3,$tweak($sp) # load the 1st tweak 2250 x $s0,0($out) # tweak^=*(inp)|stolen cipher-text 2251 x $s1,4($out) 2252 x $s2,8($out) 2253 x $s3,12($out) 2254 st${g} $out,4*$SIZE_T($sp) 2255 la $key,0($key1) 2256 bras $ra,_s390x_AES_decrypt 2257 l${g} $out,4*$SIZE_T($sp) 2258 x $s0,$tweak+0($sp) # ^=tweak 2259 x $s1,$tweak+4($sp) 2260 x $s2,$tweak+8($sp) 2261 x $s3,$tweak+12($sp) 2262 st $s0,0($out) 2263 st $s1,4($out) 2264 st $s2,8($out) 2265 st $s3,12($out) 2266 stg $sp,$tweak-16+0($sp) # wipe 2nd tweak 2267 stg $sp,$tweak-16+8($sp) 2268.Lxts_dec_done: 2269 stg $sp,$tweak+0($sp) # wipe tweak 2270 stg $sp,$tweak+8($sp) 2271 lm${g} %r6,$ra,6*$SIZE_T($sp) 2272 br $ra 2273.size AES_xts_decrypt,.-AES_xts_decrypt 2274___ 2275} 2276$code.=<<___; 2277.string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>" 2278___ 2279 2280$code =~ s/\`([^\`]*)\`/eval $1/gem; 2281print $code; 2282close STDOUT or die "error closing STDOUT: $!"; # force flush 2283