1dnl Intel Atom mpn_lshiftc -- mpn left shift with complement. 2 3dnl Copyright 2011 Free Software Foundation, Inc. 4 5dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. 6 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of either: 11dnl 12dnl * the GNU Lesser General Public License as published by the Free 13dnl Software Foundation; either version 3 of the License, or (at your 14dnl option) any later version. 15dnl 16dnl or 17dnl 18dnl * the GNU General Public License as published by the Free Software 19dnl Foundation; either version 2 of the License, or (at your option) any 20dnl later version. 21dnl 22dnl or both in parallel, as here. 23dnl 24dnl The GNU MP Library is distributed in the hope that it will be useful, but 25dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27dnl for more details. 28dnl 29dnl You should have received copies of the GNU General Public License and the 30dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31dnl see https://www.gnu.org/licenses/. 32 33include(`../config.m4') 34 35C mp_limb_t mpn_lshiftc (mp_ptr dst, mp_srcptr src, mp_size_t size, 36C unsigned cnt); 37 38C cycles/limb 39C P5 40C P6 model 0-8,10-12 41C P6 model 9 (Banias) 42C P6 model 13 (Dothan) 43C P4 model 0 (Willamette) 44C P4 model 1 (?) 45C P4 model 2 (Northwood) 46C P4 model 3 (Prescott) 47C P4 model 4 (Nocona) 48C Intel Atom 5.5 49C AMD K6 50C AMD K7 51C AMD K8 52C AMD K10 53 54defframe(PARAM_CNT, 16) 55defframe(PARAM_SIZE,12) 56defframe(PARAM_SRC, 8) 57defframe(PARAM_DST, 4) 58 59dnl re-use parameter space 60define(SAVE_UP,`PARAM_CNT') 61define(VAR_COUNT,`PARAM_SIZE') 62define(SAVE_EBX,`PARAM_SRC') 63define(SAVE_EBP,`PARAM_DST') 64 65define(`rp', `%edi') 66define(`up', `%esi') 67define(`cnt', `%ecx') 68 69ASM_START() 70 TEXT 71 72PROLOGUE(mpn_lshiftc) 73deflit(`FRAME',0) 74 mov PARAM_CNT, cnt 75 mov PARAM_SIZE, %edx 76 mov up, SAVE_UP 77 mov PARAM_SRC, up 78 push rp FRAME_pushl() 79 mov PARAM_DST, rp 80 81 lea -4(up,%edx,4), up 82 mov %ebx, SAVE_EBX 83 lea -4(rp,%edx,4), rp 84 85 shr %edx 86 mov (up), %eax 87 mov %edx, VAR_COUNT 88 jnc L(evn) 89 90 mov %eax, %ebx 91 shl %cl, %ebx 92 neg cnt 93 shr %cl, %eax 94 test %edx, %edx 95 jnz L(gt1) 96 not %ebx 97 mov %ebx, (rp) 98 jmp L(quit) 99 100L(gt1): mov %ebp, SAVE_EBP 101 push %eax 102 mov -4(up), %eax 103 mov %eax, %ebp 104 shr %cl, %eax 105 jmp L(lo1) 106 107L(evn): mov %ebp, SAVE_EBP 108 neg cnt 109 mov %eax, %ebp 110 mov -4(up), %edx 111 shr %cl, %eax 112 mov %edx, %ebx 113 shr %cl, %edx 114 neg cnt 115 decl VAR_COUNT 116 lea 4(rp), rp 117 lea -4(up), up 118 jz L(end) 119 push %eax FRAME_pushl() 120 121L(top): shl %cl, %ebp 122 or %ebp, %edx 123 shl %cl, %ebx 124 neg cnt 125 not %edx 126 mov -4(up), %eax 127 mov %eax, %ebp 128 mov %edx, -4(rp) 129 shr %cl, %eax 130 lea -8(rp), rp 131L(lo1): mov -8(up), %edx 132 or %ebx, %eax 133 mov %edx, %ebx 134 shr %cl, %edx 135 not %eax 136 lea -8(up), up 137 neg cnt 138 mov %eax, (rp) 139 decl VAR_COUNT 140 jg L(top) 141 142 pop %eax FRAME_popl() 143L(end): 144 shl %cl, %ebp 145 shl %cl, %ebx 146 or %ebp, %edx 147 mov SAVE_EBP, %ebp 148 not %edx 149 not %ebx 150 mov %edx, -4(rp) 151 mov %ebx, -8(rp) 152 153L(quit): 154 mov SAVE_UP, up 155 mov SAVE_EBX, %ebx 156 pop rp FRAME_popl() 157 ret 158EPILOGUE() 159ASM_END() 160