1e71b7053SJung-uk Kim#!/usr/bin/env perl 217f01e99SJung-uk Kim# Copyright 2017-2020 The OpenSSL Project Authors. All Rights Reserved. 3e71b7053SJung-uk Kim# 4*b077aed3SPierre Pronchery# Licensed under the Apache License 2.0 (the "License"). You may not use 5e71b7053SJung-uk Kim# this file except in compliance with the License. You can obtain a copy 6e71b7053SJung-uk Kim# in the file LICENSE in the source distribution or at 7e71b7053SJung-uk Kim# https://www.openssl.org/source/license.html 8e71b7053SJung-uk Kim# 9e71b7053SJung-uk Kim# ==================================================================== 10e71b7053SJung-uk Kim# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 11e71b7053SJung-uk Kim# project. The module is, however, dual licensed under OpenSSL and 12e71b7053SJung-uk Kim# CRYPTOGAMS licenses depending on where you obtain it. For further 13e71b7053SJung-uk Kim# details see http://www.openssl.org/~appro/cryptogams/. 14e71b7053SJung-uk Kim# ==================================================================== 15e71b7053SJung-uk Kim# 16e71b7053SJung-uk Kim# Keccak-1600 for s390x. 17e71b7053SJung-uk Kim# 18e71b7053SJung-uk Kim# June 2017. 19e71b7053SJung-uk Kim# 20e71b7053SJung-uk Kim# Below code is [lane complementing] KECCAK_2X implementation (see 21e71b7053SJung-uk Kim# sha/keccak1600.c) with C[5] and D[5] held in register bank. Though 22e71b7053SJung-uk Kim# instead of actually unrolling the loop pair-wise I simply flip 23e71b7053SJung-uk Kim# pointers to T[][] and A[][] at the end of round. Since number of 24e71b7053SJung-uk Kim# rounds is even, last round writes to A[][] and everything works out. 25e71b7053SJung-uk Kim# In the nutshell it's transliteration of x86_64 module, because both 26e71b7053SJung-uk Kim# architectures have similar capabilities/limitations. Performance 27e71b7053SJung-uk Kim# measurement is problematic as I don't have access to an idle system. 28e71b7053SJung-uk Kim# It looks like z13 processes one byte [out of long message] in ~14 29e71b7053SJung-uk Kim# cycles. At least the result is consistent with estimate based on 30e71b7053SJung-uk Kim# amount of instruction and assumed instruction issue rate. It's ~2.5x 31e71b7053SJung-uk Kim# faster than compiler-generated code. 32e71b7053SJung-uk Kim 33*b077aed3SPierre Pronchery# $output is the last argument if it looks like a file (it has an extension) 34*b077aed3SPierre Pronchery# $flavour is the first argument if it doesn't look like a file 35*b077aed3SPierre Pronchery$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 36*b077aed3SPierre Pronchery$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 37e71b7053SJung-uk Kim 38e71b7053SJung-uk Kimif ($flavour =~ /3[12]/) { 39e71b7053SJung-uk Kim $SIZE_T=4; 40e71b7053SJung-uk Kim $g=""; 41e71b7053SJung-uk Kim} else { 42e71b7053SJung-uk Kim $SIZE_T=8; 43e71b7053SJung-uk Kim $g="g"; 44e71b7053SJung-uk Kim} 45e71b7053SJung-uk Kim 46*b077aed3SPierre Pronchery$output and open STDOUT,">$output"; 47e71b7053SJung-uk Kim 48e71b7053SJung-uk Kimmy @A = map([ 8*$_, 8*($_+1), 8*($_+2), 8*($_+3), 8*($_+4) ], (0,5,10,15,20)); 49e71b7053SJung-uk Kim 50e71b7053SJung-uk Kimmy @C = map("%r$_",(0,1,5..7)); 51e71b7053SJung-uk Kimmy @D = map("%r$_",(8..12)); 52e71b7053SJung-uk Kimmy @T = map("%r$_",(13..14)); 53e71b7053SJung-uk Kimmy ($src,$dst,$iotas) = map("%r$_",(2..4)); 54e71b7053SJung-uk Kimmy $sp = "%r15"; 55e71b7053SJung-uk Kim 56e71b7053SJung-uk Kim$stdframe=16*$SIZE_T+4*8; 57e71b7053SJung-uk Kim$frame=$stdframe+25*8; 58e71b7053SJung-uk Kim 59e71b7053SJung-uk Kimmy @rhotates = ([ 0, 1, 62, 28, 27 ], 60e71b7053SJung-uk Kim [ 36, 44, 6, 55, 20 ], 61e71b7053SJung-uk Kim [ 3, 10, 43, 25, 39 ], 62e71b7053SJung-uk Kim [ 41, 45, 15, 21, 8 ], 63e71b7053SJung-uk Kim [ 18, 2, 61, 56, 14 ]); 64e71b7053SJung-uk Kim 65e71b7053SJung-uk Kim{ my @C = @C; # copy, because we mess them up... 66e71b7053SJung-uk Kim my @D = @D; 67e71b7053SJung-uk Kim 68e71b7053SJung-uk Kim$code.=<<___; 69e71b7053SJung-uk Kim.text 70e71b7053SJung-uk Kim 71e71b7053SJung-uk Kim.type __KeccakF1600,\@function 72e71b7053SJung-uk Kim.align 32 73e71b7053SJung-uk Kim__KeccakF1600: 74e71b7053SJung-uk Kim st${g} %r14,$SIZE_T*14($sp) 75e71b7053SJung-uk Kim lg @C[0],$A[4][0]($src) 76e71b7053SJung-uk Kim lg @C[1],$A[4][1]($src) 77e71b7053SJung-uk Kim lg @C[2],$A[4][2]($src) 78e71b7053SJung-uk Kim lg @C[3],$A[4][3]($src) 79e71b7053SJung-uk Kim lg @C[4],$A[4][4]($src) 80e71b7053SJung-uk Kim larl $iotas,iotas 81e71b7053SJung-uk Kim j .Loop 82e71b7053SJung-uk Kim 83e71b7053SJung-uk Kim.align 16 84e71b7053SJung-uk Kim.Loop: 85e71b7053SJung-uk Kim lg @D[0],$A[0][0]($src) 86e71b7053SJung-uk Kim lg @D[1],$A[1][1]($src) 87e71b7053SJung-uk Kim lg @D[2],$A[2][2]($src) 88e71b7053SJung-uk Kim lg @D[3],$A[3][3]($src) 89e71b7053SJung-uk Kim 90e71b7053SJung-uk Kim xgr @C[0],@D[0] 91e71b7053SJung-uk Kim xg @C[1],$A[0][1]($src) 92e71b7053SJung-uk Kim xg @C[2],$A[0][2]($src) 93e71b7053SJung-uk Kim xg @C[3],$A[0][3]($src) 94e71b7053SJung-uk Kim lgr @D[4],@C[4] 95e71b7053SJung-uk Kim xg @C[4],$A[0][4]($src) 96e71b7053SJung-uk Kim 97e71b7053SJung-uk Kim xg @C[0],$A[1][0]($src) 98e71b7053SJung-uk Kim xgr @C[1],@D[1] 99e71b7053SJung-uk Kim xg @C[2],$A[1][2]($src) 100e71b7053SJung-uk Kim xg @C[3],$A[1][3]($src) 101e71b7053SJung-uk Kim xg @C[4],$A[1][4]($src) 102e71b7053SJung-uk Kim 103e71b7053SJung-uk Kim xg @C[0],$A[2][0]($src) 104e71b7053SJung-uk Kim xg @C[1],$A[2][1]($src) 105e71b7053SJung-uk Kim xgr @C[2],@D[2] 106e71b7053SJung-uk Kim xg @C[3],$A[2][3]($src) 107e71b7053SJung-uk Kim xg @C[4],$A[2][4]($src) 108e71b7053SJung-uk Kim 109e71b7053SJung-uk Kim xg @C[0],$A[3][0]($src) 110e71b7053SJung-uk Kim xg @C[1],$A[3][1]($src) 111e71b7053SJung-uk Kim xg @C[2],$A[3][2]($src) 112e71b7053SJung-uk Kim xgr @C[3],@D[3] 113e71b7053SJung-uk Kim xg @C[4],$A[3][4]($src) 114e71b7053SJung-uk Kim 115e71b7053SJung-uk Kim lgr @T[0],@C[2] 116e71b7053SJung-uk Kim rllg @C[2],@C[2],1 117e71b7053SJung-uk Kim xgr @C[2],@C[0] # D[1] = ROL64(C[2], 1) ^ C[0] 118e71b7053SJung-uk Kim 119e71b7053SJung-uk Kim rllg @C[0],@C[0],1 120e71b7053SJung-uk Kim xgr @C[0],@C[3] # D[4] = ROL64(C[0], 1) ^ C[3] 121e71b7053SJung-uk Kim 122e71b7053SJung-uk Kim rllg @C[3],@C[3],1 123e71b7053SJung-uk Kim xgr @C[3],@C[1] # D[2] = ROL64(C[3], 1) ^ C[1] 124e71b7053SJung-uk Kim 125e71b7053SJung-uk Kim rllg @C[1],@C[1],1 126e71b7053SJung-uk Kim xgr @C[1],@C[4] # D[0] = ROL64(C[1], 1) ^ C[4] 127e71b7053SJung-uk Kim 128e71b7053SJung-uk Kim rllg @C[4],@C[4],1 129e71b7053SJung-uk Kim xgr @C[4],@T[0] # D[3] = ROL64(C[4], 1) ^ C[2] 130e71b7053SJung-uk Kim___ 131e71b7053SJung-uk Kim (@D[0..4], @C) = (@C[1..4,0], @D); 132e71b7053SJung-uk Kim$code.=<<___; 133e71b7053SJung-uk Kim xgr @C[1],@D[1] 134e71b7053SJung-uk Kim xgr @C[2],@D[2] 135e71b7053SJung-uk Kim xgr @C[3],@D[3] 136e71b7053SJung-uk Kim rllg @C[1],@C[1],$rhotates[1][1] 137e71b7053SJung-uk Kim xgr @C[4],@D[4] 138e71b7053SJung-uk Kim rllg @C[2],@C[2],$rhotates[2][2] 139e71b7053SJung-uk Kim xgr @C[0],@D[0] 140e71b7053SJung-uk Kim 141e71b7053SJung-uk Kim lgr @T[0],@C[1] 142e71b7053SJung-uk Kim ogr @C[1],@C[2] 143e71b7053SJung-uk Kim rllg @C[3],@C[3],$rhotates[3][3] 144e71b7053SJung-uk Kim xgr @C[1],@C[0] # C[0] ^ ( C[1] | C[2]) 145e71b7053SJung-uk Kim rllg @C[4],@C[4],$rhotates[4][4] 146e71b7053SJung-uk Kim xg @C[1],0($iotas) 147e71b7053SJung-uk Kim la $iotas,8($iotas) 148e71b7053SJung-uk Kim stg @C[1],$A[0][0]($dst) # R[0][0] = C[0] ^ ( C[1] | C[2]) ^ iotas[i] 149e71b7053SJung-uk Kim 150e71b7053SJung-uk Kim lgr @T[1],@C[4] 151e71b7053SJung-uk Kim ngr @C[4],@C[3] 152e71b7053SJung-uk Kim lghi @C[1],-1 # no 'not' instruction :-( 153e71b7053SJung-uk Kim xgr @C[4],@C[2] # C[2] ^ ( C[4] & C[3]) 154e71b7053SJung-uk Kim xgr @C[2],@C[1] # not @C[2] 155e71b7053SJung-uk Kim stg @C[4],$A[0][2]($dst) # R[0][2] = C[2] ^ ( C[4] & C[3]) 156e71b7053SJung-uk Kim ogr @C[2],@C[3] 157e71b7053SJung-uk Kim xgr @C[2],@T[0] # C[1] ^ (~C[2] | C[3]) 158e71b7053SJung-uk Kim 159e71b7053SJung-uk Kim ngr @T[0],@C[0] 160e71b7053SJung-uk Kim stg @C[2],$A[0][1]($dst) # R[0][1] = C[1] ^ (~C[2] | C[3]) 161e71b7053SJung-uk Kim xgr @T[0],@T[1] # C[4] ^ ( C[1] & C[0]) 162e71b7053SJung-uk Kim ogr @T[1],@C[0] 163e71b7053SJung-uk Kim stg @T[0],$A[0][4]($dst) # R[0][4] = C[4] ^ ( C[1] & C[0]) 164e71b7053SJung-uk Kim xgr @T[1],@C[3] # C[3] ^ ( C[4] | C[0]) 165e71b7053SJung-uk Kim stg @T[1],$A[0][3]($dst) # R[0][3] = C[3] ^ ( C[4] | C[0]) 166e71b7053SJung-uk Kim 167e71b7053SJung-uk Kim 168e71b7053SJung-uk Kim lg @C[0],$A[0][3]($src) 169e71b7053SJung-uk Kim lg @C[4],$A[4][2]($src) 170e71b7053SJung-uk Kim lg @C[3],$A[3][1]($src) 171e71b7053SJung-uk Kim lg @C[1],$A[1][4]($src) 172e71b7053SJung-uk Kim lg @C[2],$A[2][0]($src) 173e71b7053SJung-uk Kim 174e71b7053SJung-uk Kim xgr @C[0],@D[3] 175e71b7053SJung-uk Kim xgr @C[4],@D[2] 176e71b7053SJung-uk Kim rllg @C[0],@C[0],$rhotates[0][3] 177e71b7053SJung-uk Kim xgr @C[3],@D[1] 178e71b7053SJung-uk Kim rllg @C[4],@C[4],$rhotates[4][2] 179e71b7053SJung-uk Kim xgr @C[1],@D[4] 180e71b7053SJung-uk Kim rllg @C[3],@C[3],$rhotates[3][1] 181e71b7053SJung-uk Kim xgr @C[2],@D[0] 182e71b7053SJung-uk Kim 183e71b7053SJung-uk Kim lgr @T[0],@C[0] 184e71b7053SJung-uk Kim ogr @C[0],@C[4] 185e71b7053SJung-uk Kim rllg @C[1],@C[1],$rhotates[1][4] 186e71b7053SJung-uk Kim xgr @C[0],@C[3] # C[3] ^ (C[0] | C[4]) 187e71b7053SJung-uk Kim rllg @C[2],@C[2],$rhotates[2][0] 188e71b7053SJung-uk Kim stg @C[0],$A[1][3]($dst) # R[1][3] = C[3] ^ (C[0] | C[4]) 189e71b7053SJung-uk Kim 190e71b7053SJung-uk Kim lgr @T[1],@C[1] 191e71b7053SJung-uk Kim ngr @C[1],@T[0] 192e71b7053SJung-uk Kim lghi @C[0],-1 # no 'not' instruction :-( 193e71b7053SJung-uk Kim xgr @C[1],@C[4] # C[4] ^ (C[1] & C[0]) 194e71b7053SJung-uk Kim xgr @C[4],@C[0] # not @C[4] 195e71b7053SJung-uk Kim stg @C[1],$A[1][4]($dst) # R[1][4] = C[4] ^ (C[1] & C[0]) 196e71b7053SJung-uk Kim 197e71b7053SJung-uk Kim ogr @C[4],@C[3] 198e71b7053SJung-uk Kim xgr @C[4],@C[2] # C[2] ^ (~C[4] | C[3]) 199e71b7053SJung-uk Kim 200e71b7053SJung-uk Kim ngr @C[3],@C[2] 201e71b7053SJung-uk Kim stg @C[4],$A[1][2]($dst) # R[1][2] = C[2] ^ (~C[4] | C[3]) 202e71b7053SJung-uk Kim xgr @C[3],@T[1] # C[1] ^ (C[3] & C[2]) 203e71b7053SJung-uk Kim ogr @T[1],@C[2] 204e71b7053SJung-uk Kim stg @C[3],$A[1][1]($dst) # R[1][1] = C[1] ^ (C[3] & C[2]) 205e71b7053SJung-uk Kim xgr @T[1],@T[0] # C[0] ^ (C[1] | C[2]) 206e71b7053SJung-uk Kim stg @T[1],$A[1][0]($dst) # R[1][0] = C[0] ^ (C[1] | C[2]) 207e71b7053SJung-uk Kim 208e71b7053SJung-uk Kim 209e71b7053SJung-uk Kim lg @C[2],$A[2][3]($src) 210e71b7053SJung-uk Kim lg @C[3],$A[3][4]($src) 211e71b7053SJung-uk Kim lg @C[1],$A[1][2]($src) 212e71b7053SJung-uk Kim lg @C[4],$A[4][0]($src) 213e71b7053SJung-uk Kim lg @C[0],$A[0][1]($src) 214e71b7053SJung-uk Kim 215e71b7053SJung-uk Kim xgr @C[2],@D[3] 216e71b7053SJung-uk Kim xgr @C[3],@D[4] 217e71b7053SJung-uk Kim rllg @C[2],@C[2],$rhotates[2][3] 218e71b7053SJung-uk Kim xgr @C[1],@D[2] 219e71b7053SJung-uk Kim rllg @C[3],@C[3],$rhotates[3][4] 220e71b7053SJung-uk Kim xgr @C[4],@D[0] 221e71b7053SJung-uk Kim rllg @C[1],@C[1],$rhotates[1][2] 222e71b7053SJung-uk Kim xgr @C[0],@D[1] 223e71b7053SJung-uk Kim 224e71b7053SJung-uk Kim lgr @T[0],@C[2] 225e71b7053SJung-uk Kim ngr @C[2],@C[3] 226e71b7053SJung-uk Kim rllg @C[4],@C[4],$rhotates[4][0] 227e71b7053SJung-uk Kim xgr @C[2],@C[1] # C[1] ^ ( C[2] & C[3]) 228e71b7053SJung-uk Kim lghi @T[1],-1 # no 'not' instruction :-( 229e71b7053SJung-uk Kim stg @C[2],$A[2][1]($dst) # R[2][1] = C[1] ^ ( C[2] & C[3]) 230e71b7053SJung-uk Kim 231e71b7053SJung-uk Kim xgr @C[3],@T[1] # not @C[3] 232e71b7053SJung-uk Kim lgr @T[1],@C[4] 233e71b7053SJung-uk Kim ngr @C[4],@C[3] 234e71b7053SJung-uk Kim rllg @C[0],@C[0],$rhotates[0][1] 235e71b7053SJung-uk Kim xgr @C[4],@T[0] # C[2] ^ ( C[4] & ~C[3]) 236e71b7053SJung-uk Kim ogr @T[0],@C[1] 237e71b7053SJung-uk Kim stg @C[4],$A[2][2]($dst) # R[2][2] = C[2] ^ ( C[4] & ~C[3]) 238e71b7053SJung-uk Kim xgr @T[0],@C[0] # C[0] ^ ( C[2] | C[1]) 239e71b7053SJung-uk Kim 240e71b7053SJung-uk Kim ngr @C[1],@C[0] 241e71b7053SJung-uk Kim stg @T[0],$A[2][0]($dst) # R[2][0] = C[0] ^ ( C[2] | C[1]) 242e71b7053SJung-uk Kim xgr @C[1],@T[1] # C[4] ^ ( C[1] & C[0]) 243e71b7053SJung-uk Kim ogr @C[0],@T[1] 244e71b7053SJung-uk Kim stg @C[1],$A[2][4]($dst) # R[2][4] = C[4] ^ ( C[1] & C[0]) 245e71b7053SJung-uk Kim xgr @C[0],@C[3] # ~C[3] ^ ( C[0] | C[4]) 246e71b7053SJung-uk Kim stg @C[0],$A[2][3]($dst) # R[2][3] = ~C[3] ^ ( C[0] | C[4]) 247e71b7053SJung-uk Kim 248e71b7053SJung-uk Kim 249e71b7053SJung-uk Kim lg @C[2],$A[2][1]($src) 250e71b7053SJung-uk Kim lg @C[3],$A[3][2]($src) 251e71b7053SJung-uk Kim lg @C[1],$A[1][0]($src) 252e71b7053SJung-uk Kim lg @C[4],$A[4][3]($src) 253e71b7053SJung-uk Kim lg @C[0],$A[0][4]($src) 254e71b7053SJung-uk Kim 255e71b7053SJung-uk Kim xgr @C[2],@D[1] 256e71b7053SJung-uk Kim xgr @C[3],@D[2] 257e71b7053SJung-uk Kim rllg @C[2],@C[2],$rhotates[2][1] 258e71b7053SJung-uk Kim xgr @C[1],@D[0] 259e71b7053SJung-uk Kim rllg @C[3],@C[3],$rhotates[3][2] 260e71b7053SJung-uk Kim xgr @C[4],@D[3] 261e71b7053SJung-uk Kim rllg @C[1],@C[1],$rhotates[1][0] 262e71b7053SJung-uk Kim xgr @C[0],@D[4] 263e71b7053SJung-uk Kim rllg @C[4],@C[4],$rhotates[4][3] 264e71b7053SJung-uk Kim 265e71b7053SJung-uk Kim lgr @T[0],@C[2] 266e71b7053SJung-uk Kim ogr @C[2],@C[3] 267e71b7053SJung-uk Kim lghi @T[1],-1 # no 'not' instruction :-( 268e71b7053SJung-uk Kim xgr @C[2],@C[1] # C[1] ^ ( C[2] | C[3]) 269e71b7053SJung-uk Kim xgr @C[3],@T[1] # not @C[3] 270e71b7053SJung-uk Kim stg @C[2],$A[3][1]($dst) # R[3][1] = C[1] ^ ( C[2] | C[3]) 271e71b7053SJung-uk Kim 272e71b7053SJung-uk Kim lgr @T[1],@C[4] 273e71b7053SJung-uk Kim ogr @C[4],@C[3] 274e71b7053SJung-uk Kim rllg @C[0],@C[0],$rhotates[0][4] 275e71b7053SJung-uk Kim xgr @C[4],@T[0] # C[2] ^ ( C[4] | ~C[3]) 276e71b7053SJung-uk Kim ngr @T[0],@C[1] 277e71b7053SJung-uk Kim stg @C[4],$A[3][2]($dst) # R[3][2] = C[2] ^ ( C[4] | ~C[3]) 278e71b7053SJung-uk Kim xgr @T[0],@C[0] # C[0] ^ ( C[2] & C[1]) 279e71b7053SJung-uk Kim 280e71b7053SJung-uk Kim ogr @C[1],@C[0] 281e71b7053SJung-uk Kim stg @T[0],$A[3][0]($dst) # R[3][0] = C[0] ^ ( C[2] & C[1]) 282e71b7053SJung-uk Kim xgr @C[1],@T[1] # C[4] ^ ( C[1] | C[0]) 283e71b7053SJung-uk Kim ngr @C[0],@T[1] 284e71b7053SJung-uk Kim stg @C[1],$A[3][4]($dst) # R[3][4] = C[4] ^ ( C[1] | C[0]) 285e71b7053SJung-uk Kim xgr @C[0],@C[3] # ~C[3] ^ ( C[0] & C[4]) 286e71b7053SJung-uk Kim stg @C[0],$A[3][3]($dst) # R[3][3] = ~C[3] ^ ( C[0] & C[4]) 287e71b7053SJung-uk Kim 288e71b7053SJung-uk Kim 289e71b7053SJung-uk Kim xg @D[2],$A[0][2]($src) 290e71b7053SJung-uk Kim xg @D[3],$A[1][3]($src) 291e71b7053SJung-uk Kim xg @D[1],$A[4][1]($src) 292e71b7053SJung-uk Kim xg @D[4],$A[2][4]($src) 293e71b7053SJung-uk Kim xgr $dst,$src # xchg $dst,$src 294e71b7053SJung-uk Kim rllg @D[2],@D[2],$rhotates[0][2] 295e71b7053SJung-uk Kim xg @D[0],$A[3][0]($src) 296e71b7053SJung-uk Kim rllg @D[3],@D[3],$rhotates[1][3] 297e71b7053SJung-uk Kim xgr $src,$dst 298e71b7053SJung-uk Kim rllg @D[1],@D[1],$rhotates[4][1] 299e71b7053SJung-uk Kim xgr $dst,$src 300e71b7053SJung-uk Kim rllg @D[4],@D[4],$rhotates[2][4] 301e71b7053SJung-uk Kim___ 302e71b7053SJung-uk Kim @C = @D[2..4,0,1]; 303e71b7053SJung-uk Kim$code.=<<___; 304e71b7053SJung-uk Kim lgr @T[0],@C[0] 305e71b7053SJung-uk Kim ngr @C[0],@C[1] 306e71b7053SJung-uk Kim lghi @T[1],-1 # no 'not' instruction :-( 307e71b7053SJung-uk Kim xgr @C[0],@C[4] # C[4] ^ ( C[0] & C[1]) 308e71b7053SJung-uk Kim xgr @C[1],@T[1] # not @C[1] 309e71b7053SJung-uk Kim stg @C[0],$A[4][4]($src) # R[4][4] = C[4] ^ ( C[0] & C[1]) 310e71b7053SJung-uk Kim 311e71b7053SJung-uk Kim lgr @T[1],@C[2] 312e71b7053SJung-uk Kim ngr @C[2],@C[1] 313e71b7053SJung-uk Kim rllg @D[0],@D[0],$rhotates[3][0] 314e71b7053SJung-uk Kim xgr @C[2],@T[0] # C[0] ^ ( C[2] & ~C[1]) 315e71b7053SJung-uk Kim ogr @T[0],@C[4] 316e71b7053SJung-uk Kim stg @C[2],$A[4][0]($src) # R[4][0] = C[0] ^ ( C[2] & ~C[1]) 317e71b7053SJung-uk Kim xgr @T[0],@C[3] # C[3] ^ ( C[0] | C[4]) 318e71b7053SJung-uk Kim 319e71b7053SJung-uk Kim ngr @C[4],@C[3] 320e71b7053SJung-uk Kim stg @T[0],$A[4][3]($src) # R[4][3] = C[3] ^ ( C[0] | C[4]) 321e71b7053SJung-uk Kim xgr @C[4],@T[1] # C[2] ^ ( C[4] & C[3]) 322e71b7053SJung-uk Kim ogr @C[3],@T[1] 323e71b7053SJung-uk Kim stg @C[4],$A[4][2]($src) # R[4][2] = C[2] ^ ( C[4] & C[3]) 324e71b7053SJung-uk Kim xgr @C[3],@C[1] # ~C[1] ^ ( C[2] | C[3]) 325e71b7053SJung-uk Kim 326e71b7053SJung-uk Kim lgr @C[1],@C[0] # harmonize with the loop top 327e71b7053SJung-uk Kim lgr @C[0],@T[0] 328e71b7053SJung-uk Kim stg @C[3],$A[4][1]($src) # R[4][1] = ~C[1] ^ ( C[2] | C[3]) 329e71b7053SJung-uk Kim 330e71b7053SJung-uk Kim tmll $iotas,255 331e71b7053SJung-uk Kim jnz .Loop 332e71b7053SJung-uk Kim 333e71b7053SJung-uk Kim l${g} %r14,$SIZE_T*14($sp) 334e71b7053SJung-uk Kim br %r14 335e71b7053SJung-uk Kim.size __KeccakF1600,.-__KeccakF1600 336e71b7053SJung-uk Kim___ 337e71b7053SJung-uk Kim} 338e71b7053SJung-uk Kim{ 339e71b7053SJung-uk Kim$code.=<<___; 340e71b7053SJung-uk Kim.type KeccakF1600,\@function 341e71b7053SJung-uk Kim.align 32 342e71b7053SJung-uk KimKeccakF1600: 343e71b7053SJung-uk Kim.LKeccakF1600: 344e71b7053SJung-uk Kim lghi %r1,-$frame 345e71b7053SJung-uk Kim stm${g} %r6,%r15,$SIZE_T*6($sp) 346e71b7053SJung-uk Kim lgr %r0,$sp 347e71b7053SJung-uk Kim la $sp,0(%r1,$sp) 348e71b7053SJung-uk Kim st${g} %r0,0($sp) 349e71b7053SJung-uk Kim 350e71b7053SJung-uk Kim lghi @D[0],-1 # no 'not' instruction :-( 351e71b7053SJung-uk Kim lghi @D[1],-1 352e71b7053SJung-uk Kim lghi @D[2],-1 353e71b7053SJung-uk Kim lghi @D[3],-1 354e71b7053SJung-uk Kim lghi @D[4],-1 355e71b7053SJung-uk Kim lghi @T[0],-1 356e71b7053SJung-uk Kim xg @D[0],$A[0][1]($src) 357e71b7053SJung-uk Kim xg @D[1],$A[0][2]($src) 358e71b7053SJung-uk Kim xg @D[2],$A[1][3]($src) 359e71b7053SJung-uk Kim xg @D[3],$A[2][2]($src) 360e71b7053SJung-uk Kim xg @D[4],$A[3][2]($src) 361e71b7053SJung-uk Kim xg @T[0],$A[4][0]($src) 362e71b7053SJung-uk Kim stmg @D[0],@D[1],$A[0][1]($src) 363e71b7053SJung-uk Kim stg @D[2],$A[1][3]($src) 364e71b7053SJung-uk Kim stg @D[3],$A[2][2]($src) 365e71b7053SJung-uk Kim stg @D[4],$A[3][2]($src) 366e71b7053SJung-uk Kim stg @T[0],$A[4][0]($src) 367e71b7053SJung-uk Kim 368e71b7053SJung-uk Kim la $dst,$stdframe($sp) 369e71b7053SJung-uk Kim 370e71b7053SJung-uk Kim bras %r14,__KeccakF1600 371e71b7053SJung-uk Kim 372e71b7053SJung-uk Kim lghi @D[0],-1 # no 'not' instruction :-( 373e71b7053SJung-uk Kim lghi @D[1],-1 374e71b7053SJung-uk Kim lghi @D[2],-1 375e71b7053SJung-uk Kim lghi @D[3],-1 376e71b7053SJung-uk Kim lghi @D[4],-1 377e71b7053SJung-uk Kim lghi @T[0],-1 378e71b7053SJung-uk Kim xg @D[0],$A[0][1]($src) 379e71b7053SJung-uk Kim xg @D[1],$A[0][2]($src) 380e71b7053SJung-uk Kim xg @D[2],$A[1][3]($src) 381e71b7053SJung-uk Kim xg @D[3],$A[2][2]($src) 382e71b7053SJung-uk Kim xg @D[4],$A[3][2]($src) 383e71b7053SJung-uk Kim xg @T[0],$A[4][0]($src) 384e71b7053SJung-uk Kim stmg @D[0],@D[1],$A[0][1]($src) 385e71b7053SJung-uk Kim stg @D[2],$A[1][3]($src) 386e71b7053SJung-uk Kim stg @D[3],$A[2][2]($src) 387e71b7053SJung-uk Kim stg @D[4],$A[3][2]($src) 388e71b7053SJung-uk Kim stg @T[0],$A[4][0]($src) 389e71b7053SJung-uk Kim 390e71b7053SJung-uk Kim lm${g} %r6,%r15,$frame+6*$SIZE_T($sp) 391e71b7053SJung-uk Kim br %r14 392e71b7053SJung-uk Kim.size KeccakF1600,.-KeccakF1600 393e71b7053SJung-uk Kim___ 394e71b7053SJung-uk Kim} 395e71b7053SJung-uk Kim{ my ($A_flat,$inp,$len,$bsz) = map("%r$_",(2..5)); 396e71b7053SJung-uk Kim 397e71b7053SJung-uk Kim$code.=<<___; 398e71b7053SJung-uk Kim.globl SHA3_absorb 399e71b7053SJung-uk Kim.type SHA3_absorb,\@function 400e71b7053SJung-uk Kim.align 32 401e71b7053SJung-uk KimSHA3_absorb: 402e71b7053SJung-uk Kim lghi %r1,-$frame 403e71b7053SJung-uk Kim stm${g} %r5,%r15,$SIZE_T*5($sp) 404e71b7053SJung-uk Kim lgr %r0,$sp 405e71b7053SJung-uk Kim la $sp,0(%r1,$sp) 406e71b7053SJung-uk Kim st${g} %r0,0($sp) 407e71b7053SJung-uk Kim 408e71b7053SJung-uk Kim lghi @D[0],-1 # no 'not' instruction :-( 409e71b7053SJung-uk Kim lghi @D[1],-1 410e71b7053SJung-uk Kim lghi @D[2],-1 411e71b7053SJung-uk Kim lghi @D[3],-1 412e71b7053SJung-uk Kim lghi @D[4],-1 413e71b7053SJung-uk Kim lghi @T[0],-1 414e71b7053SJung-uk Kim xg @D[0],$A[0][1]($src) 415e71b7053SJung-uk Kim xg @D[1],$A[0][2]($src) 416e71b7053SJung-uk Kim xg @D[2],$A[1][3]($src) 417e71b7053SJung-uk Kim xg @D[3],$A[2][2]($src) 418e71b7053SJung-uk Kim xg @D[4],$A[3][2]($src) 419e71b7053SJung-uk Kim xg @T[0],$A[4][0]($src) 420e71b7053SJung-uk Kim stmg @D[0],@D[1],$A[0][1]($src) 421e71b7053SJung-uk Kim stg @D[2],$A[1][3]($src) 422e71b7053SJung-uk Kim stg @D[3],$A[2][2]($src) 423e71b7053SJung-uk Kim stg @D[4],$A[3][2]($src) 424e71b7053SJung-uk Kim stg @T[0],$A[4][0]($src) 425e71b7053SJung-uk Kim 426e71b7053SJung-uk Kim.Loop_absorb: 427e71b7053SJung-uk Kim cl${g}r $len,$bsz 428e71b7053SJung-uk Kim jl .Ldone_absorb 429e71b7053SJung-uk Kim 430e71b7053SJung-uk Kim srl${g} $bsz,3 431e71b7053SJung-uk Kim la %r1,0($A_flat) 432e71b7053SJung-uk Kim 433e71b7053SJung-uk Kim.Lblock_absorb: 434e71b7053SJung-uk Kim lrvg %r0,0($inp) 435e71b7053SJung-uk Kim la $inp,8($inp) 436e71b7053SJung-uk Kim xg %r0,0(%r1) 437e71b7053SJung-uk Kim a${g}hi $len,-8 438c9cf7b5cSJung-uk Kim stg %r0,0(%r1) 439c9cf7b5cSJung-uk Kim la %r1,8(%r1) 440e71b7053SJung-uk Kim brct $bsz,.Lblock_absorb 441e71b7053SJung-uk Kim 442e71b7053SJung-uk Kim stm${g} $inp,$len,$frame+3*$SIZE_T($sp) 443e71b7053SJung-uk Kim la $dst,$stdframe($sp) 444e71b7053SJung-uk Kim bras %r14,__KeccakF1600 445e71b7053SJung-uk Kim lm${g} $inp,$bsz,$frame+3*$SIZE_T($sp) 446e71b7053SJung-uk Kim j .Loop_absorb 447e71b7053SJung-uk Kim 448e71b7053SJung-uk Kim.align 16 449e71b7053SJung-uk Kim.Ldone_absorb: 450e71b7053SJung-uk Kim lghi @D[0],-1 # no 'not' instruction :-( 451e71b7053SJung-uk Kim lghi @D[1],-1 452e71b7053SJung-uk Kim lghi @D[2],-1 453e71b7053SJung-uk Kim lghi @D[3],-1 454e71b7053SJung-uk Kim lghi @D[4],-1 455e71b7053SJung-uk Kim lghi @T[0],-1 456e71b7053SJung-uk Kim xg @D[0],$A[0][1]($src) 457e71b7053SJung-uk Kim xg @D[1],$A[0][2]($src) 458e71b7053SJung-uk Kim xg @D[2],$A[1][3]($src) 459e71b7053SJung-uk Kim xg @D[3],$A[2][2]($src) 460e71b7053SJung-uk Kim xg @D[4],$A[3][2]($src) 461e71b7053SJung-uk Kim xg @T[0],$A[4][0]($src) 462e71b7053SJung-uk Kim stmg @D[0],@D[1],$A[0][1]($src) 463e71b7053SJung-uk Kim stg @D[2],$A[1][3]($src) 464e71b7053SJung-uk Kim stg @D[3],$A[2][2]($src) 465e71b7053SJung-uk Kim stg @D[4],$A[3][2]($src) 466e71b7053SJung-uk Kim stg @T[0],$A[4][0]($src) 467e71b7053SJung-uk Kim 468e71b7053SJung-uk Kim lgr %r2,$len # return value 469e71b7053SJung-uk Kim 470e71b7053SJung-uk Kim lm${g} %r6,%r15,$frame+6*$SIZE_T($sp) 471e71b7053SJung-uk Kim br %r14 472e71b7053SJung-uk Kim.size SHA3_absorb,.-SHA3_absorb 473e71b7053SJung-uk Kim___ 474e71b7053SJung-uk Kim} 475e71b7053SJung-uk Kim{ my ($A_flat,$out,$len,$bsz) = map("%r$_",(2..5)); 476e71b7053SJung-uk Kim 477e71b7053SJung-uk Kim$code.=<<___; 478e71b7053SJung-uk Kim.globl SHA3_squeeze 479e71b7053SJung-uk Kim.type SHA3_squeeze,\@function 480e71b7053SJung-uk Kim.align 32 481e71b7053SJung-uk KimSHA3_squeeze: 482e71b7053SJung-uk Kim srl${g} $bsz,3 483e71b7053SJung-uk Kim st${g} %r14,2*$SIZE_T($sp) 484e71b7053SJung-uk Kim lghi %r14,8 485e71b7053SJung-uk Kim st${g} $bsz,5*$SIZE_T($sp) 486e71b7053SJung-uk Kim la %r1,0($A_flat) 487e71b7053SJung-uk Kim 488e71b7053SJung-uk Kim j .Loop_squeeze 489e71b7053SJung-uk Kim 490e71b7053SJung-uk Kim.align 16 491e71b7053SJung-uk Kim.Loop_squeeze: 492e71b7053SJung-uk Kim cl${g}r $len,%r14 493e71b7053SJung-uk Kim jl .Ltail_squeeze 494e71b7053SJung-uk Kim 495e71b7053SJung-uk Kim lrvg %r0,0(%r1) 496e71b7053SJung-uk Kim la %r1,8(%r1) 497e71b7053SJung-uk Kim stg %r0,0($out) 498e71b7053SJung-uk Kim la $out,8($out) 499e71b7053SJung-uk Kim a${g}hi $len,-8 # len -= 8 500e71b7053SJung-uk Kim jz .Ldone_squeeze 501e71b7053SJung-uk Kim 502e71b7053SJung-uk Kim brct $bsz,.Loop_squeeze # bsz-- 503e71b7053SJung-uk Kim 504e71b7053SJung-uk Kim stm${g} $out,$len,3*$SIZE_T($sp) 505e71b7053SJung-uk Kim bras %r14,.LKeccakF1600 506e71b7053SJung-uk Kim lm${g} $out,$bsz,3*$SIZE_T($sp) 507e71b7053SJung-uk Kim lghi %r14,8 508e71b7053SJung-uk Kim la %r1,0($A_flat) 509e71b7053SJung-uk Kim j .Loop_squeeze 510e71b7053SJung-uk Kim 511e71b7053SJung-uk Kim.Ltail_squeeze: 512e71b7053SJung-uk Kim lg %r0,0(%r1) 513e71b7053SJung-uk Kim.Loop_tail_squeeze: 514e71b7053SJung-uk Kim stc %r0,0($out) 515e71b7053SJung-uk Kim la $out,1($out) 516e71b7053SJung-uk Kim srlg %r0,8 517e71b7053SJung-uk Kim brct $len,.Loop_tail_squeeze 518e71b7053SJung-uk Kim 519e71b7053SJung-uk Kim.Ldone_squeeze: 520e71b7053SJung-uk Kim l${g} %r14,2*$SIZE_T($sp) 521e71b7053SJung-uk Kim br %r14 522e71b7053SJung-uk Kim.size SHA3_squeeze,.-SHA3_squeeze 523e71b7053SJung-uk Kim___ 524e71b7053SJung-uk Kim} 525e71b7053SJung-uk Kim$code.=<<___; 526e71b7053SJung-uk Kim.align 256 527e71b7053SJung-uk Kim .quad 0,0,0,0,0,0,0,0 528e71b7053SJung-uk Kim.type iotas,\@object 529e71b7053SJung-uk Kimiotas: 530e71b7053SJung-uk Kim .quad 0x0000000000000001 531e71b7053SJung-uk Kim .quad 0x0000000000008082 532e71b7053SJung-uk Kim .quad 0x800000000000808a 533e71b7053SJung-uk Kim .quad 0x8000000080008000 534e71b7053SJung-uk Kim .quad 0x000000000000808b 535e71b7053SJung-uk Kim .quad 0x0000000080000001 536e71b7053SJung-uk Kim .quad 0x8000000080008081 537e71b7053SJung-uk Kim .quad 0x8000000000008009 538e71b7053SJung-uk Kim .quad 0x000000000000008a 539e71b7053SJung-uk Kim .quad 0x0000000000000088 540e71b7053SJung-uk Kim .quad 0x0000000080008009 541e71b7053SJung-uk Kim .quad 0x000000008000000a 542e71b7053SJung-uk Kim .quad 0x000000008000808b 543e71b7053SJung-uk Kim .quad 0x800000000000008b 544e71b7053SJung-uk Kim .quad 0x8000000000008089 545e71b7053SJung-uk Kim .quad 0x8000000000008003 546e71b7053SJung-uk Kim .quad 0x8000000000008002 547e71b7053SJung-uk Kim .quad 0x8000000000000080 548e71b7053SJung-uk Kim .quad 0x000000000000800a 549e71b7053SJung-uk Kim .quad 0x800000008000000a 550e71b7053SJung-uk Kim .quad 0x8000000080008081 551e71b7053SJung-uk Kim .quad 0x8000000000008080 552e71b7053SJung-uk Kim .quad 0x0000000080000001 553e71b7053SJung-uk Kim .quad 0x8000000080008008 554e71b7053SJung-uk Kim.size iotas,.-iotas 555e71b7053SJung-uk Kim.asciz "Keccak-1600 absorb and squeeze for s390x, CRYPTOGAMS by <appro\@openssl.org>" 556e71b7053SJung-uk Kim___ 557e71b7053SJung-uk Kim 558e71b7053SJung-uk Kim# unlike 32-bit shift 64-bit one takes three arguments 559e71b7053SJung-uk Kim$code =~ s/(srlg\s+)(%r[0-9]+),/$1$2,$2,/gm; 560e71b7053SJung-uk Kim 561e71b7053SJung-uk Kimprint $code; 56217f01e99SJung-uk Kimclose STDOUT or die "error closing STDOUT: $!"; 563