1dnl Alpha mpn_rshift -- Shift a number right. 2 3dnl Copyright 1994, 1995, 2000, 2009 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C cycles/limb 34C EV4: ? 35C EV5: 3.25 36C EV6: 1.75 37 38C INPUT PARAMETERS 39C rp r16 40C up r17 41C n r18 42C cnt r19 43 44 45ASM_START() 46PROLOGUE(mpn_rshift) 47 ldq r4,0(r17) C load first limb 48 subq r31,r19,r20 49 subq r18,1,r18 50 and r18,4-1,r28 C number of limbs in first loop 51 sll r4,r20,r0 C compute function result 52 53 beq r28,L(L0) 54 subq r18,r28,r18 55 56 ALIGN(8) 57L(top0): 58 ldq r3,8(r17) 59 addq r16,8,r16 60 srl r4,r19,r5 61 addq r17,8,r17 62 subq r28,1,r28 63 sll r3,r20,r6 64 bis r3,r3,r4 65 bis r5,r6,r8 66 stq r8,-8(r16) 67 bne r28,L(top0) 68 69L(L0): srl r4,r19,r24 70 beq r18,L(end) 71C warm up phase 1 72 ldq r1,8(r17) 73 subq r18,4,r18 74 ldq r2,16(r17) 75 ldq r3,24(r17) 76 ldq r4,32(r17) 77C warm up phase 2 78 sll r1,r20,r7 79 srl r1,r19,r21 80 sll r2,r20,r8 81 beq r18,L(end1) 82 ldq r1,40(r17) 83 srl r2,r19,r22 84 ldq r2,48(r17) 85 sll r3,r20,r5 86 bis r7,r24,r7 87 srl r3,r19,r23 88 bis r8,r21,r8 89 sll r4,r20,r6 90 ldq r3,56(r17) 91 srl r4,r19,r24 92 ldq r4,64(r17) 93 subq r18,4,r18 94 beq r18,L(end2) 95 ALIGN(16) 96C main loop 97L(top): stq r7,0(r16) 98 bis r5,r22,r5 99 stq r8,8(r16) 100 bis r6,r23,r6 101 102 sll r1,r20,r7 103 subq r18,4,r18 104 srl r1,r19,r21 105 unop C ldq r31,-96(r17) 106 107 sll r2,r20,r8 108 ldq r1,72(r17) 109 srl r2,r19,r22 110 ldq r2,80(r17) 111 112 stq r5,16(r16) 113 bis r7,r24,r7 114 stq r6,24(r16) 115 bis r8,r21,r8 116 117 sll r3,r20,r5 118 unop C ldq r31,-96(r17) 119 srl r3,r19,r23 120 addq r16,32,r16 121 122 sll r4,r20,r6 123 ldq r3,88(r17) 124 srl r4,r19,r24 125 ldq r4,96(r17) 126 127 addq r17,32,r17 128 bne r18,L(top) 129C cool down phase 2/1 130L(end2): 131 stq r7,0(r16) 132 bis r5,r22,r5 133 stq r8,8(r16) 134 bis r6,r23,r6 135 sll r1,r20,r7 136 srl r1,r19,r21 137 sll r2,r20,r8 138 srl r2,r19,r22 139 stq r5,16(r16) 140 bis r7,r24,r7 141 stq r6,24(r16) 142 bis r8,r21,r8 143 sll r3,r20,r5 144 srl r3,r19,r23 145 sll r4,r20,r6 146 srl r4,r19,r24 147C cool down phase 2/2 148 stq r7,32(r16) 149 bis r5,r22,r5 150 stq r8,40(r16) 151 bis r6,r23,r6 152 stq r5,48(r16) 153 stq r6,56(r16) 154C cool down phase 2/3 155 stq r24,64(r16) 156 ret r31,(r26),1 157 158C cool down phase 1/1 159L(end1): 160 srl r2,r19,r22 161 sll r3,r20,r5 162 bis r7,r24,r7 163 srl r3,r19,r23 164 bis r8,r21,r8 165 sll r4,r20,r6 166 srl r4,r19,r24 167C cool down phase 1/2 168 stq r7,0(r16) 169 bis r5,r22,r5 170 stq r8,8(r16) 171 bis r6,r23,r6 172 stq r5,16(r16) 173 stq r6,24(r16) 174 stq r24,32(r16) 175 ret r31,(r26),1 176 177L(end): stq r24,0(r16) 178 ret r31,(r26),1 179EPILOGUE(mpn_rshift) 180ASM_END() 181