1dnl Alpha mpn_lshift -- Shift a number left. 2 3dnl Copyright 1994, 1995, 2000, 2003, 2009 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C cycles/limb 34C EV4: ? 35C EV5: 3.25 36C EV6: 1.75 37 38C INPUT PARAMETERS 39C rp r16 40C up r17 41C n r18 42C cnt r19 43 44 45ASM_START() 46PROLOGUE(mpn_lshift) 47 s8addq r18,r17,r17 C make r17 point at end of s1 48 ldq r4,-8(r17) C load first limb 49 subq r31,r19,r20 50 s8addq r18,r16,r16 C make r16 point at end of RES 51 subq r18,1,r18 52 and r18,4-1,r28 C number of limbs in first loop 53 srl r4,r20,r0 C compute function result 54 55 beq r28,L(L0) 56 subq r18,r28,r18 57 58 ALIGN(8) 59L(top0): 60 ldq r3,-16(r17) 61 subq r16,8,r16 62 sll r4,r19,r5 63 subq r17,8,r17 64 subq r28,1,r28 65 srl r3,r20,r6 66 bis r3,r3,r4 67 bis r5,r6,r8 68 stq r8,0(r16) 69 bne r28,L(top0) 70 71L(L0): sll r4,r19,r24 72 beq r18,L(end) 73C warm up phase 1 74 ldq r1,-16(r17) 75 subq r18,4,r18 76 ldq r2,-24(r17) 77 ldq r3,-32(r17) 78 ldq r4,-40(r17) 79C warm up phase 2 80 srl r1,r20,r7 81 sll r1,r19,r21 82 srl r2,r20,r8 83 beq r18,L(end1) 84 ldq r1,-48(r17) 85 sll r2,r19,r22 86 ldq r2,-56(r17) 87 srl r3,r20,r5 88 bis r7,r24,r7 89 sll r3,r19,r23 90 bis r8,r21,r8 91 srl r4,r20,r6 92 ldq r3,-64(r17) 93 sll r4,r19,r24 94 ldq r4,-72(r17) 95 subq r18,4,r18 96 beq r18,L(end2) 97 ALIGN(16) 98C main loop 99L(top): stq r7,-8(r16) 100 bis r5,r22,r5 101 stq r8,-16(r16) 102 bis r6,r23,r6 103 104 srl r1,r20,r7 105 subq r18,4,r18 106 sll r1,r19,r21 107 unop C ldq r31,-96(r17) 108 109 srl r2,r20,r8 110 ldq r1,-80(r17) 111 sll r2,r19,r22 112 ldq r2,-88(r17) 113 114 stq r5,-24(r16) 115 bis r7,r24,r7 116 stq r6,-32(r16) 117 bis r8,r21,r8 118 119 srl r3,r20,r5 120 unop C ldq r31,-96(r17) 121 sll r3,r19,r23 122 subq r16,32,r16 123 124 srl r4,r20,r6 125 ldq r3,-96(r17) 126 sll r4,r19,r24 127 ldq r4,-104(r17) 128 129 subq r17,32,r17 130 bne r18,L(top) 131C cool down phase 2/1 132L(end2): 133 stq r7,-8(r16) 134 bis r5,r22,r5 135 stq r8,-16(r16) 136 bis r6,r23,r6 137 srl r1,r20,r7 138 sll r1,r19,r21 139 srl r2,r20,r8 140 sll r2,r19,r22 141 stq r5,-24(r16) 142 bis r7,r24,r7 143 stq r6,-32(r16) 144 bis r8,r21,r8 145 srl r3,r20,r5 146 sll r3,r19,r23 147 srl r4,r20,r6 148 sll r4,r19,r24 149C cool down phase 2/2 150 stq r7,-40(r16) 151 bis r5,r22,r5 152 stq r8,-48(r16) 153 bis r6,r23,r6 154 stq r5,-56(r16) 155 stq r6,-64(r16) 156C cool down phase 2/3 157 stq r24,-72(r16) 158 ret r31,(r26),1 159 160C cool down phase 1/1 161L(end1): 162 sll r2,r19,r22 163 srl r3,r20,r5 164 bis r7,r24,r7 165 sll r3,r19,r23 166 bis r8,r21,r8 167 srl r4,r20,r6 168 sll r4,r19,r24 169C cool down phase 1/2 170 stq r7,-8(r16) 171 bis r5,r22,r5 172 stq r8,-16(r16) 173 bis r6,r23,r6 174 stq r5,-24(r16) 175 stq r6,-32(r16) 176 stq r24,-40(r16) 177 ret r31,(r26),1 178 179L(end): stq r24,-8(r16) 180 ret r31,(r26),1 181EPILOGUE(mpn_lshift) 182ASM_END() 183