1dnl Alpha mpn_add_n -- Add two limb vectors of the same length > 0 and 2dnl store sum in a third limb vector. 3 4dnl Copyright 1995, 1999, 2000, 2005, 2011 Free Software Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7dnl 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of either: 10dnl 11dnl * the GNU Lesser General Public License as published by the Free 12dnl Software Foundation; either version 3 of the License, or (at your 13dnl option) any later version. 14dnl 15dnl or 16dnl 17dnl * the GNU General Public License as published by the Free Software 18dnl Foundation; either version 2 of the License, or (at your option) any 19dnl later version. 20dnl 21dnl or both in parallel, as here. 22dnl 23dnl The GNU MP Library is distributed in the hope that it will be useful, but 24dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26dnl for more details. 27dnl 28dnl You should have received copies of the GNU General Public License and the 29dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30dnl see https://www.gnu.org/licenses/. 31 32include(`../config.m4') 33 34C cycles/limb 35C EV4: ? 36C EV5: 4.75 37C EV6: 3 38 39dnl INPUT PARAMETERS 40dnl res_ptr r16 41dnl s1_ptr r17 42dnl s2_ptr r18 43dnl size r19 44 45ASM_START() 46PROLOGUE(mpn_add_nc) 47 bis r20,r31,r25 48 br L(com) 49EPILOGUE() 50PROLOGUE(mpn_add_n) 51 bis r31,r31,r25 C clear cy 52L(com): subq r19,4,r19 C decr loop cnt 53 blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop 54C Start software pipeline for 1st loop 55 ldq r0,0(r18) 56 ldq r4,0(r17) 57 ldq r1,8(r18) 58 ldq r5,8(r17) 59 addq r17,32,r17 C update s1_ptr 60 addq r0,r4,r28 C 1st main add 61 ldq r2,16(r18) 62 addq r25,r28,r20 C 1st carry add 63 ldq r3,24(r18) 64 cmpult r28,r4,r8 C compute cy from last add 65 ldq r6,-16(r17) 66 cmpult r20,r28,r25 C compute cy from last add 67 ldq r7,-8(r17) 68 bis r8,r25,r25 C combine cy from the two adds 69 subq r19,4,r19 C decr loop cnt 70 addq r1,r5,r28 C 2nd main add 71 addq r18,32,r18 C update s2_ptr 72 addq r28,r25,r21 C 2nd carry add 73 cmpult r28,r5,r8 C compute cy from last add 74 blt r19,$Lend1 C if less than 4 limbs remain, jump 75C 1st loop handles groups of 4 limbs in a software pipeline 76 ALIGN(16) 77$Loop: cmpult r21,r28,r25 C compute cy from last add 78 ldq r0,0(r18) 79 bis r8,r25,r25 C combine cy from the two adds 80 ldq r1,8(r18) 81 addq r2,r6,r28 C 3rd main add 82 ldq r4,0(r17) 83 addq r28,r25,r22 C 3rd carry add 84 ldq r5,8(r17) 85 cmpult r28,r6,r8 C compute cy from last add 86 cmpult r22,r28,r25 C compute cy from last add 87 stq r20,0(r16) 88 bis r8,r25,r25 C combine cy from the two adds 89 stq r21,8(r16) 90 addq r3,r7,r28 C 4th main add 91 addq r28,r25,r23 C 4th carry add 92 cmpult r28,r7,r8 C compute cy from last add 93 cmpult r23,r28,r25 C compute cy from last add 94 addq r17,32,r17 C update s1_ptr 95 bis r8,r25,r25 C combine cy from the two adds 96 addq r16,32,r16 C update res_ptr 97 addq r0,r4,r28 C 1st main add 98 ldq r2,16(r18) 99 addq r25,r28,r20 C 1st carry add 100 ldq r3,24(r18) 101 cmpult r28,r4,r8 C compute cy from last add 102 ldq r6,-16(r17) 103 cmpult r20,r28,r25 C compute cy from last add 104 ldq r7,-8(r17) 105 bis r8,r25,r25 C combine cy from the two adds 106 subq r19,4,r19 C decr loop cnt 107 stq r22,-16(r16) 108 addq r1,r5,r28 C 2nd main add 109 stq r23,-8(r16) 110 addq r25,r28,r21 C 2nd carry add 111 addq r18,32,r18 C update s2_ptr 112 cmpult r28,r5,r8 C compute cy from last add 113 bge r19,$Loop 114C Finish software pipeline for 1st loop 115$Lend1: cmpult r21,r28,r25 C compute cy from last add 116 bis r8,r25,r25 C combine cy from the two adds 117 addq r2,r6,r28 C 3rd main add 118 addq r28,r25,r22 C 3rd carry add 119 cmpult r28,r6,r8 C compute cy from last add 120 cmpult r22,r28,r25 C compute cy from last add 121 stq r20,0(r16) 122 bis r8,r25,r25 C combine cy from the two adds 123 stq r21,8(r16) 124 addq r3,r7,r28 C 4th main add 125 addq r28,r25,r23 C 4th carry add 126 cmpult r28,r7,r8 C compute cy from last add 127 cmpult r23,r28,r25 C compute cy from last add 128 bis r8,r25,r25 C combine cy from the two adds 129 addq r16,32,r16 C update res_ptr 130 stq r22,-16(r16) 131 stq r23,-8(r16) 132$Lend2: addq r19,4,r19 C restore loop cnt 133 beq r19,$Lret 134C Start software pipeline for 2nd loop 135 ldq r0,0(r18) 136 ldq r4,0(r17) 137 subq r19,1,r19 138 beq r19,$Lend0 139C 2nd loop handles remaining 1-3 limbs 140 ALIGN(16) 141$Loop0: addq r0,r4,r28 C main add 142 ldq r0,8(r18) 143 cmpult r28,r4,r8 C compute cy from last add 144 ldq r4,8(r17) 145 addq r28,r25,r20 C carry add 146 addq r18,8,r18 147 addq r17,8,r17 148 stq r20,0(r16) 149 cmpult r20,r28,r25 C compute cy from last add 150 subq r19,1,r19 C decr loop cnt 151 bis r8,r25,r25 C combine cy from the two adds 152 addq r16,8,r16 153 bne r19,$Loop0 154$Lend0: addq r0,r4,r28 C main add 155 addq r28,r25,r20 C carry add 156 cmpult r28,r4,r8 C compute cy from last add 157 cmpult r20,r28,r25 C compute cy from last add 158 stq r20,0(r16) 159 bis r8,r25,r25 C combine cy from the two adds 160 161$Lret: bis r25,r31,r0 C return cy 162 ret r31,(r26),1 163EPILOGUE() 164ASM_END() 165