1dnl Intel Pentium mpn_mul_basecase -- mpn by mpn multiplication. 2 3dnl Copyright 1996, 1998-2000, 2002 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33 34C P5: 14.2 cycles/crossproduct (approx) 35 36 37C void mpn_mul_basecase (mp_ptr wp, 38C mp_srcptr xp, mp_size_t xsize, 39C mp_srcptr yp, mp_size_t ysize); 40 41defframe(PARAM_YSIZE, 20) 42defframe(PARAM_YP, 16) 43defframe(PARAM_XSIZE, 12) 44defframe(PARAM_XP, 8) 45defframe(PARAM_WP, 4) 46 47defframe(VAR_COUNTER, -4) 48 49 TEXT 50 ALIGN(8) 51PROLOGUE(mpn_mul_basecase) 52 53 pushl %eax C dummy push for allocating stack slot 54 pushl %esi 55 pushl %ebp 56 pushl %edi 57deflit(`FRAME',16) 58 59 movl PARAM_XP,%esi 60 movl PARAM_WP,%edi 61 movl PARAM_YP,%ebp 62 63 movl (%esi),%eax C load xp[0] 64 mull (%ebp) C multiply by yp[0] 65 movl %eax,(%edi) C store to wp[0] 66 movl PARAM_XSIZE,%ecx C xsize 67 decl %ecx C If xsize = 1, ysize = 1 too 68 jz L(done) 69 70 movl PARAM_XSIZE,%eax 71 pushl %ebx 72FRAME_pushl() 73 movl %edx,%ebx 74 leal (%esi,%eax,4),%esi C make xp point at end 75 leal (%edi,%eax,4),%edi C offset wp by xsize 76 negl %ecx C negate j size/index for inner loop 77 xorl %eax,%eax C clear carry 78 79 ALIGN(8) 80L(oop1): adcl $0,%ebx 81 movl (%esi,%ecx,4),%eax C load next limb at xp[j] 82 mull (%ebp) 83 addl %ebx,%eax 84 movl %eax,(%edi,%ecx,4) 85 incl %ecx 86 movl %edx,%ebx 87 jnz L(oop1) 88 89 adcl $0,%ebx 90 movl PARAM_YSIZE,%eax 91 movl %ebx,(%edi) C most significant limb of product 92 addl $4,%edi C increment wp 93 decl %eax 94 jz L(skip) 95 movl %eax,VAR_COUNTER C set index i to ysize 96 97L(outer): 98 addl $4,%ebp C make ebp point to next y limb 99 movl PARAM_XSIZE,%ecx 100 negl %ecx 101 xorl %ebx,%ebx 102 103 C code at 0x61 here, close enough to aligned 104L(oop2): 105 adcl $0,%ebx 106 movl (%esi,%ecx,4),%eax 107 mull (%ebp) 108 addl %ebx,%eax 109 movl (%edi,%ecx,4),%ebx 110 adcl $0,%edx 111 addl %eax,%ebx 112 movl %ebx,(%edi,%ecx,4) 113 incl %ecx 114 movl %edx,%ebx 115 jnz L(oop2) 116 117 adcl $0,%ebx 118 119 movl %ebx,(%edi) 120 addl $4,%edi 121 movl VAR_COUNTER,%eax 122 decl %eax 123 movl %eax,VAR_COUNTER 124 jnz L(outer) 125 126L(skip): 127 popl %ebx 128 popl %edi 129 popl %ebp 130 popl %esi 131 addl $4,%esp 132 ret 133 134L(done): 135 movl %edx,4(%edi) C store to wp[1] 136 popl %edi 137 popl %ebp 138 popl %esi 139 popl %eax C dummy pop for deallocating stack slot 140 ret 141 142EPILOGUE() 143