1dnl Alpha mpn_mod_34lsub1. 2 3dnl Copyright 2002 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C cycles/limb 34C EV4: 4 (?) 35C EV5: 2.67 36C EV6: 1.67 37 38 39dnl INPUT PARAMETERS 40dnl up r16 41dnl n r17 42 43define(`l0',`r18') 44define(`l1',`r19') 45define(`l2',`r20') 46define(`a0',`r21') 47define(`a1',`r22') 48define(`a2',`r23') 49define(`c0',`r24') 50define(`c1',`r5') 51define(`c2',`r6') 52 53ASM_START() 54PROLOGUE(mpn_mod_34lsub1) 55 bis r31, r31, c0 56 bis r31, r31, c1 57 bis r31, r31, c2 58 59 lda r17, -3(r17) 60 bge r17, $L_3_or_more 61 bis r31, r31, a0 62 bis r31, r31, a1 63 bis r31, r31, a2 64 br r31, $L_012 65 66$L_3_or_more: 67 ldq a0, 0(r16) 68 ldq a1, 8(r16) 69 ldq a2, 16(r16) 70 lda r16, 24(r16) 71 lda r17, -3(r17) 72 blt r17, $L_012 73 74$L_6_or_more: 75 ldq l0, 0(r16) 76 ldq l1, 8(r16) 77 ldq l2, 16(r16) 78 addq l0, a0, a0 79 80 lda r16, 24(r16) 81 lda r17, -3(r17) 82 blt r17, $L_end 83 84 ALIGN(16) 85C Main loop 86$L_9_or_more: 87$Loop: cmpult a0, l0, r0 88 ldq l0, 0(r16) 89 addq r0, c0, c0 90 addq l1, a1, a1 91 cmpult a1, l1, r0 92 ldq l1, 8(r16) 93 addq r0, c1, c1 94 addq l2, a2, a2 95 cmpult a2, l2, r0 96 ldq l2, 16(r16) 97 addq r0, c2, c2 98 addq l0, a0, a0 99 lda r16, 24(r16) 100 lda r17, -3(r17) 101 bge r17, $Loop 102 103$L_end: cmpult a0, l0, r0 104 addq r0, c0, c0 105 addq l1, a1, a1 106 cmpult a1, l1, r0 107 addq r0, c1, c1 108 addq l2, a2, a2 109 cmpult a2, l2, r0 110 addq r0, c2, c2 111 112C Handle the last (n mod 3) limbs 113$L_012: lda r17, 2(r17) 114 blt r17, $L_0 115 ldq l0, 0(r16) 116 addq l0, a0, a0 117 cmpult a0, l0, r0 118 addq r0, c0, c0 119 beq r17, $L_0 120 ldq l1, 8(r16) 121 addq l1, a1, a1 122 cmpult a1, l1, r0 123 addq r0, c1, c1 124 125C Align and sum our 3 main accumulators and 3 carry accumulators 126$L_0: srl a0, 48, r2 127 srl a1, 32, r4 128ifdef(`HAVE_LIMB_LITTLE_ENDIAN', 129` insll a1, 2, r1', C (a1 & 0xffffffff) << 16 130` zapnot a1, 15, r25 131 sll r25, 16, r1') 132 zapnot a0, 63, r0 C a0 & 0xffffffffffff 133 srl a2, 16, a1 134ifdef(`HAVE_LIMB_LITTLE_ENDIAN', 135` inswl a2, 4, r3', C (a2 & 0xffff) << 32 136` zapnot a2, 3, r25 137 sll r25, 32, r3') 138 addq r1, r4, r1 139 addq r0, r2, r0 140 srl c0, 32, a2 141ifdef(`HAVE_LIMB_LITTLE_ENDIAN', 142` insll c0, 2, r4', C (c0 & 0xffffffff) << 16 143` zapnot c0, 15, r25 144 sll r25, 16, r4') 145 addq r0, r1, r0 146 addq r3, a1, r3 147 addq r0, r3, r0 148 srl c1, 16, c0 149ifdef(`HAVE_LIMB_LITTLE_ENDIAN', 150` inswl c1, 4, r2', C (c1 & 0xffff) << 32 151` zapnot c1, 3, r25 152 sll r25, 32, r2') 153 addq r4, a2, r4 154C srl c2, 48, r3 C This will be 0 in practise 155 zapnot c2, 63, r1 C r1 = c2 & 0xffffffffffff 156 addq r0, r4, r0 157 addq r2, c0, r2 158 addq r0, r2, r0 159C addq r1, r3, r1 160 addq r0, r1, r0 161 162 ret r31, (r26), 1 163EPILOGUE(mpn_mod_34lsub1) 164ASM_END() 165