1d25e02daSmrgdnl S/390-32 mpn_add_n and mpn_sub_n. 2d25e02daSmrg 3d25e02daSmrgdnl Copyright 2011 Free Software Foundation, Inc. 4d25e02daSmrg 5d25e02daSmrgdnl This file is part of the GNU MP Library. 6*f81b1c5bSmrgdnl 7d25e02daSmrgdnl The GNU MP Library is free software; you can redistribute it and/or modify 8*f81b1c5bSmrgdnl it under the terms of either: 9*f81b1c5bSmrgdnl 10*f81b1c5bSmrgdnl * the GNU Lesser General Public License as published by the Free 11*f81b1c5bSmrgdnl Software Foundation; either version 3 of the License, or (at your 12*f81b1c5bSmrgdnl option) any later version. 13*f81b1c5bSmrgdnl 14*f81b1c5bSmrgdnl or 15*f81b1c5bSmrgdnl 16*f81b1c5bSmrgdnl * the GNU General Public License as published by the Free Software 17*f81b1c5bSmrgdnl Foundation; either version 2 of the License, or (at your option) any 18*f81b1c5bSmrgdnl later version. 19*f81b1c5bSmrgdnl 20*f81b1c5bSmrgdnl or both in parallel, as here. 21*f81b1c5bSmrgdnl 22d25e02daSmrgdnl The GNU MP Library is distributed in the hope that it will be useful, but 23d25e02daSmrgdnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24*f81b1c5bSmrgdnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25*f81b1c5bSmrgdnl for more details. 26*f81b1c5bSmrgdnl 27*f81b1c5bSmrgdnl You should have received copies of the GNU General Public License and the 28*f81b1c5bSmrgdnl GNU Lesser General Public License along with the GNU MP Library. If not, 29*f81b1c5bSmrgdnl see https://www.gnu.org/licenses/. 30d25e02daSmrg 31d25e02daSmrginclude(`../config.m4') 32d25e02daSmrg 33d25e02daSmrgC cycles/limb 34d25e02daSmrgC z900 ? 35d25e02daSmrgC z990 2.75-3 (fast for even n, slow for odd n) 36d25e02daSmrgC z9 ? 37d25e02daSmrgC z10 ? 38d25e02daSmrgC z196 ? 39d25e02daSmrg 40d25e02daSmrgC TODO 41d25e02daSmrgC * Optimise for small n 42d25e02daSmrgC * Use r0 and save/restore one less register 43d25e02daSmrgC * Using logops_n's v1 inner loop operand order make the loop about 20% 44d25e02daSmrgC faster, at the expense of highly alignment-dependent performance. 45d25e02daSmrg 46d25e02daSmrgC INPUT PARAMETERS 47d25e02daSmrgdefine(`rp', `%r2') 48d25e02daSmrgdefine(`up', `%r3') 49d25e02daSmrgdefine(`vp', `%r4') 50d25e02daSmrgdefine(`n', `%r5') 51d25e02daSmrg 52d25e02daSmrgifdef(`OPERATION_add_n', ` 53d25e02daSmrg define(ADSB, al) 54d25e02daSmrg define(ADSBCR, alcr) 55d25e02daSmrg define(ADSBC, alc) 56d25e02daSmrg define(RETVAL,`dnl 57d25e02daSmrg lhi %r2, 0 58d25e02daSmrg alcr %r2, %r2') 59d25e02daSmrg define(func, mpn_add_n) 60d25e02daSmrg define(func_nc, mpn_add_nc)') 61d25e02daSmrgifdef(`OPERATION_sub_n', ` 62d25e02daSmrg define(ADSB, sl) 63d25e02daSmrg define(ADSBCR, slbr) 64d25e02daSmrg define(ADSBC, slb) 65d25e02daSmrg define(RETVAL,`dnl 66d25e02daSmrg slbr %r2, %r2 67d25e02daSmrg lcr %r2, %r2') 68d25e02daSmrg define(func, mpn_sub_n) 69d25e02daSmrg define(func_nc, mpn_sub_nc)') 70d25e02daSmrg 71d25e02daSmrgMULFUNC_PROLOGUE(mpn_add_n mpn_sub_n) 72d25e02daSmrg 73d25e02daSmrgASM_START() 74d25e02daSmrgPROLOGUE(func) 75d25e02daSmrg stm %r6, %r8, 24(%r15) 76d25e02daSmrg 77d25e02daSmrg ahi n, 3 78d25e02daSmrg lhi %r7, 3 79d25e02daSmrg lr %r1, n 80d25e02daSmrg srl %r1, 2 81d25e02daSmrg nr %r7, n C n mod 4 82d25e02daSmrg je L(b1) 83d25e02daSmrg chi %r7, 2 84d25e02daSmrg jl L(b2) 85d25e02daSmrg jne L(b0) 86d25e02daSmrg 87d25e02daSmrgL(b3): lm %r5, %r7, 0(up) 88d25e02daSmrg la up, 12(up) 89d25e02daSmrg ADSB %r5, 0(vp) 90d25e02daSmrg ADSBC %r6, 4(vp) 91d25e02daSmrg ADSBC %r7, 8(vp) 92d25e02daSmrg la vp, 12(vp) 93d25e02daSmrg stm %r5, %r7, 0(rp) 94d25e02daSmrg la rp, 12(rp) 95d25e02daSmrg brct %r1, L(top) 96d25e02daSmrg j L(end) 97d25e02daSmrg 98d25e02daSmrgL(b0): lm %r5, %r8, 0(up) C This redundant insns is no mistake, 99d25e02daSmrg la up, 16(up) C it is needed to make main loop run 100d25e02daSmrg ADSB %r5, 0(vp) C fast for n = 0 (mod 4). 101d25e02daSmrg ADSBC %r6, 4(vp) 102d25e02daSmrg j L(m0) 103d25e02daSmrg 104d25e02daSmrgL(b1): l %r5, 0(up) 105d25e02daSmrg la up, 4(up) 106d25e02daSmrg ADSB %r5, 0(vp) 107d25e02daSmrg la vp, 4(vp) 108d25e02daSmrg st %r5, 0(rp) 109d25e02daSmrg la rp, 4(rp) 110d25e02daSmrg brct %r1, L(top) 111d25e02daSmrg j L(end) 112d25e02daSmrg 113d25e02daSmrgL(b2): lm %r5, %r6, 0(up) 114d25e02daSmrg la up, 8(up) 115d25e02daSmrg ADSB %r5, 0(vp) 116d25e02daSmrg ADSBC %r6, 4(vp) 117d25e02daSmrg la vp, 8(vp) 118d25e02daSmrg stm %r5, %r6, 0(rp) 119d25e02daSmrg la rp, 8(rp) 120d25e02daSmrg brct %r1, L(top) 121d25e02daSmrg j L(end) 122d25e02daSmrg 123d25e02daSmrgL(top): lm %r5, %r8, 0(up) 124d25e02daSmrg la up, 16(up) 125d25e02daSmrg ADSBC %r5, 0(vp) 126d25e02daSmrg ADSBC %r6, 4(vp) 127d25e02daSmrgL(m0): ADSBC %r7, 8(vp) 128d25e02daSmrg ADSBC %r8, 12(vp) 129d25e02daSmrg la vp, 16(vp) 130d25e02daSmrg stm %r5, %r8, 0(rp) 131d25e02daSmrg la rp, 16(rp) 132d25e02daSmrg brct %r1, L(top) 133d25e02daSmrg 134d25e02daSmrgL(end): RETVAL 135d25e02daSmrg lm %r6, %r8, 24(%r15) 136d25e02daSmrg br %r14 137d25e02daSmrgEPILOGUE() 138