1/* Pentium optimized __mpn_lshift -- 2 Copyright (C) 1992, 94, 95, 96, 97, 98, 2000 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, write to the Free 17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 18 02111-1307 USA. */ 19 20#if 0 /* vstr */ 21#include "sysdep.h" 22#include "asm-syntax.h" 23#include "bp-sym.h" 24#include "bp-asm.h" 25#endif /* vstr */ 26 27#define PARMS LINKAGE+16 /* space for 4 saved regs */ 28#define RES PARMS 29#define S RES+PTR_SIZE 30#define SIZE S+PTR_SIZE 31#define CNT SIZE+4 32 33 .text 34ENTRY (BP_SYM (__mpn_lshift)) 35 ENTER 36 37 pushl %edi 38 pushl %esi 39 pushl %ebp 40 pushl %ebx 41 42 movl RES(%esp),%edi 43 movl S(%esp),%esi 44 movl SIZE(%esp),%ebx 45 movl CNT(%esp),%ecx 46#if __BOUNDED_POINTERS__ 47 shll $2, %ebx /* convert limbs to bytes */ 48 CHECK_BOUNDS_BOTH_WIDE (%edi, RES(%esp), %ebx) 49 CHECK_BOUNDS_BOTH_WIDE (%esi, S(%esp), %ebx) 50 shrl $2, %ebx 51#endif 52 53/* We can use faster code for shift-by-1 under certain conditions. */ 54 cmp $1,%ecx 55 jne L(normal) 56 leal 4(%esi),%eax 57 cmpl %edi,%eax 58 jnc L(special) /* jump if s_ptr + 1 >= res_ptr */ 59 leal (%esi,%ebx,4),%eax 60 cmpl %eax,%edi 61 jnc L(special) /* jump if res_ptr >= s_ptr + size */ 62 63L(normal): 64 leal -4(%edi,%ebx,4),%edi 65 leal -4(%esi,%ebx,4),%esi 66 67 movl (%esi),%edx 68 subl $4,%esi 69 xorl %eax,%eax 70 shldl %cl,%edx,%eax /* compute carry limb */ 71 pushl %eax /* push carry limb onto stack */ 72 73 decl %ebx 74 pushl %ebx 75 shrl $3,%ebx 76 jz L(end) 77 78 movl (%edi),%eax /* fetch destination cache line */ 79 80 ALIGN (2) 81L(oop): movl -28(%edi),%eax /* fetch destination cache line */ 82 movl %edx,%ebp 83 84 movl (%esi),%eax 85 movl -4(%esi),%edx 86 shldl %cl,%eax,%ebp 87 shldl %cl,%edx,%eax 88 movl %ebp,(%edi) 89 movl %eax,-4(%edi) 90 91 movl -8(%esi),%ebp 92 movl -12(%esi),%eax 93 shldl %cl,%ebp,%edx 94 shldl %cl,%eax,%ebp 95 movl %edx,-8(%edi) 96 movl %ebp,-12(%edi) 97 98 movl -16(%esi),%edx 99 movl -20(%esi),%ebp 100 shldl %cl,%edx,%eax 101 shldl %cl,%ebp,%edx 102 movl %eax,-16(%edi) 103 movl %edx,-20(%edi) 104 105 movl -24(%esi),%eax 106 movl -28(%esi),%edx 107 shldl %cl,%eax,%ebp 108 shldl %cl,%edx,%eax 109 movl %ebp,-24(%edi) 110 movl %eax,-28(%edi) 111 112 subl $32,%esi 113 subl $32,%edi 114 decl %ebx 115 jnz L(oop) 116 117L(end): popl %ebx 118 andl $7,%ebx 119 jz L(end2) 120L(oop2): 121 movl (%esi),%eax 122 shldl %cl,%eax,%edx 123 movl %edx,(%edi) 124 movl %eax,%edx 125 subl $4,%esi 126 subl $4,%edi 127 decl %ebx 128 jnz L(oop2) 129 130L(end2): 131 shll %cl,%edx /* compute least significant limb */ 132 movl %edx,(%edi) /* store it */ 133 134 popl %eax /* pop carry limb */ 135 136 popl %ebx 137 popl %ebp 138 popl %esi 139 popl %edi 140 141 LEAVE 142 ret 143 144/* We loop from least significant end of the arrays, which is only 145 permissible if the source and destination don't overlap, since the 146 function is documented to work for overlapping source and destination. 147*/ 148 149L(special): 150 movl (%esi),%edx 151 addl $4,%esi 152 153 decl %ebx 154 pushl %ebx 155 shrl $3,%ebx 156 157 addl %edx,%edx 158 incl %ebx 159 decl %ebx 160 jz L(Lend) 161 162 movl (%edi),%eax /* fetch destination cache line */ 163 164 ALIGN (2) 165L(Loop): 166 movl 28(%edi),%eax /* fetch destination cache line */ 167 movl %edx,%ebp 168 169 movl (%esi),%eax 170 movl 4(%esi),%edx 171 adcl %eax,%eax 172 movl %ebp,(%edi) 173 adcl %edx,%edx 174 movl %eax,4(%edi) 175 176 movl 8(%esi),%ebp 177 movl 12(%esi),%eax 178 adcl %ebp,%ebp 179 movl %edx,8(%edi) 180 adcl %eax,%eax 181 movl %ebp,12(%edi) 182 183 movl 16(%esi),%edx 184 movl 20(%esi),%ebp 185 adcl %edx,%edx 186 movl %eax,16(%edi) 187 adcl %ebp,%ebp 188 movl %edx,20(%edi) 189 190 movl 24(%esi),%eax 191 movl 28(%esi),%edx 192 adcl %eax,%eax 193 movl %ebp,24(%edi) 194 adcl %edx,%edx 195 movl %eax,28(%edi) 196 197 leal 32(%esi),%esi /* use leal not to clobber carry */ 198 leal 32(%edi),%edi 199 decl %ebx 200 jnz L(Loop) 201 202L(Lend): 203 popl %ebx 204 sbbl %eax,%eax /* save carry in %eax */ 205 andl $7,%ebx 206 jz L(Lend2) 207 addl %eax,%eax /* restore carry from eax */ 208L(Loop2): 209 movl %edx,%ebp 210 movl (%esi),%edx 211 adcl %edx,%edx 212 movl %ebp,(%edi) 213 214 leal 4(%esi),%esi /* use leal not to clobber carry */ 215 leal 4(%edi),%edi 216 decl %ebx 217 jnz L(Loop2) 218 219 jmp L(L1) 220L(Lend2): 221 addl %eax,%eax /* restore carry from eax */ 222L(L1): movl %edx,(%edi) /* store last limb */ 223 224 sbbl %eax,%eax 225 negl %eax 226 227 popl %ebx 228 popl %ebp 229 popl %esi 230 popl %edi 231 232 LEAVE 233 ret 234END (BP_SYM (__mpn_lshift)) 235