1#! /usr/bin/env perl 2# Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9# 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16# 17# February 2009 18# 19# Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to 20# "cluster" Address Generation Interlocks, so that one pipeline stall 21# resolves several dependencies. 22 23# November 2010. 24# 25# Adapt for -m31 build. If kernel supports what's called "highgprs" 26# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit 27# instructions and achieve "64-bit" performance even in 31-bit legacy 28# application context. The feature is not specific to any particular 29# processor, as long as it's "z-CPU". Latter implies that the code 30# remains z/Architecture specific. On z990 it was measured to perform 31# 50% better than code generated by gcc 4.3. 32 33$flavour = shift; 34 35if ($flavour =~ /3[12]/) { 36 $SIZE_T=4; 37 $g=""; 38} else { 39 $SIZE_T=8; 40 $g="g"; 41} 42 43while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} 44open STDOUT,">$output"; 45 46$rp="%r14"; 47$sp="%r15"; 48$code=<<___; 49.text 50 51___ 52 53# void RC4(RC4_KEY *key,size_t len,const void *inp,void *out) 54{ 55$acc="%r0"; 56$cnt="%r1"; 57$key="%r2"; 58$len="%r3"; 59$inp="%r4"; 60$out="%r5"; 61 62@XX=("%r6","%r7"); 63@TX=("%r8","%r9"); 64$YY="%r10"; 65$TY="%r11"; 66 67$code.=<<___; 68.globl RC4 69.type RC4,\@function 70.align 64 71RC4: 72 stm${g} %r6,%r11,6*$SIZE_T($sp) 73___ 74$code.=<<___ if ($flavour =~ /3[12]/); 75 llgfr $len,$len 76___ 77$code.=<<___; 78 llgc $XX[0],0($key) 79 llgc $YY,1($key) 80 la $XX[0],1($XX[0]) 81 nill $XX[0],0xff 82 srlg $cnt,$len,3 83 ltgr $cnt,$cnt 84 llgc $TX[0],2($XX[0],$key) 85 jz .Lshort 86 j .Loop8 87 88.align 64 89.Loop8: 90___ 91for ($i=0;$i<8;$i++) { 92$code.=<<___; 93 la $YY,0($YY,$TX[0]) # $i 94 nill $YY,255 95 la $XX[1],1($XX[0]) 96 nill $XX[1],255 97___ 98$code.=<<___ if ($i==1); 99 llgc $acc,2($TY,$key) 100___ 101$code.=<<___ if ($i>1); 102 sllg $acc,$acc,8 103 ic $acc,2($TY,$key) 104___ 105$code.=<<___; 106 llgc $TY,2($YY,$key) 107 stc $TX[0],2($YY,$key) 108 llgc $TX[1],2($XX[1],$key) 109 stc $TY,2($XX[0],$key) 110 cr $XX[1],$YY 111 jne .Lcmov$i 112 la $TX[1],0($TX[0]) 113.Lcmov$i: 114 la $TY,0($TY,$TX[0]) 115 nill $TY,255 116___ 117push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers 118} 119 120$code.=<<___; 121 lg $TX[1],0($inp) 122 sllg $acc,$acc,8 123 la $inp,8($inp) 124 ic $acc,2($TY,$key) 125 xgr $acc,$TX[1] 126 stg $acc,0($out) 127 la $out,8($out) 128 brctg $cnt,.Loop8 129 130.Lshort: 131 lghi $acc,7 132 ngr $len,$acc 133 jz .Lexit 134 j .Loop1 135 136.align 16 137.Loop1: 138 la $YY,0($YY,$TX[0]) 139 nill $YY,255 140 llgc $TY,2($YY,$key) 141 stc $TX[0],2($YY,$key) 142 stc $TY,2($XX[0],$key) 143 ar $TY,$TX[0] 144 ahi $XX[0],1 145 nill $TY,255 146 nill $XX[0],255 147 llgc $acc,0($inp) 148 la $inp,1($inp) 149 llgc $TY,2($TY,$key) 150 llgc $TX[0],2($XX[0],$key) 151 xr $acc,$TY 152 stc $acc,0($out) 153 la $out,1($out) 154 brct $len,.Loop1 155 156.Lexit: 157 ahi $XX[0],-1 158 stc $XX[0],0($key) 159 stc $YY,1($key) 160 lm${g} %r6,%r11,6*$SIZE_T($sp) 161 br $rp 162.size RC4,.-RC4 163.string "RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>" 164 165___ 166} 167 168# void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp) 169{ 170$cnt="%r0"; 171$idx="%r1"; 172$key="%r2"; 173$len="%r3"; 174$inp="%r4"; 175$acc="%r5"; 176$dat="%r6"; 177$ikey="%r7"; 178$iinp="%r8"; 179 180$code.=<<___; 181.globl RC4_set_key 182.type RC4_set_key,\@function 183.align 64 184RC4_set_key: 185 stm${g} %r6,%r8,6*$SIZE_T($sp) 186 lhi $cnt,256 187 la $idx,0(%r0) 188 sth $idx,0($key) 189.align 4 190.L1stloop: 191 stc $idx,2($idx,$key) 192 la $idx,1($idx) 193 brct $cnt,.L1stloop 194 195 lghi $ikey,-256 196 lr $cnt,$len 197 la $iinp,0(%r0) 198 la $idx,0(%r0) 199.align 16 200.L2ndloop: 201 llgc $acc,2+256($ikey,$key) 202 llgc $dat,0($iinp,$inp) 203 la $idx,0($idx,$acc) 204 la $ikey,1($ikey) 205 la $idx,0($idx,$dat) 206 nill $idx,255 207 la $iinp,1($iinp) 208 tml $ikey,255 209 llgc $dat,2($idx,$key) 210 stc $dat,2+256-1($ikey,$key) 211 stc $acc,2($idx,$key) 212 jz .Ldone 213 brct $cnt,.L2ndloop 214 lr $cnt,$len 215 la $iinp,0(%r0) 216 j .L2ndloop 217.Ldone: 218 lm${g} %r6,%r8,6*$SIZE_T($sp) 219 br $rp 220.size RC4_set_key,.-RC4_set_key 221 222___ 223} 224 225# const char *RC4_options() 226$code.=<<___; 227.globl RC4_options 228.type RC4_options,\@function 229.align 16 230RC4_options: 231 larl %r2,.Loptions 232 br %r14 233.size RC4_options,.-RC4_options 234.section .rodata 235.Loptions: 236.align 8 237.string "rc4(8x,char)" 238___ 239 240print $code; 241close STDOUT; # force flush 242