1dnl mpn_sumdiff 2 3dnl Copyright 2011 The Code Cavern 4 5dnl This file is part of the MPIR Library. 6 7dnl The MPIR Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 2.1 of the License, or (at 10dnl your option) any later version. 11 12dnl The MPIR Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the MPIR Library; see the file COPYING.LIB. If not, write 19dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 20dnl Boston, MA 02110-1301, USA. 21 22include(`../config.m4') 23 24ASM_START() 25PROLOGUE(mpn_sumdiff_n) 26push %rbx 27xor %r9,%r9 28push %rbp 29xor %rax,%rax 30push %r12 31mov $3,%r10d 32push %r13 33lea -24(%rdi,%r8,8),%rdi 34push %r14 35lea -24(%rsi,%r8,8),%rsi 36push %r15 37sub %r8,%r10 38lea -24(%rdx,%r8,8),%rdx 39lea -24(%rcx,%r8,8),%rcx 40jnc skiplp 41.align 16 42lp: .byte 0x9e # sahf 43 mov (%rdx,%r10,8),%r8 44 mov %r8,%r11 45 adc (%rcx,%r10,8),%r8 46 mov 8(%rdx,%r10,8),%rbx 47 mov %rbx,%r13 48 adc 8(%rcx,%r10,8),%rbx 49 mov 16(%rdx,%r10,8),%rbp 50 mov 24(%rdx,%r10,8),%r12 51 mov %rbp,%r14 52 mov %r12,%r15 53 adc 16(%rcx,%r10,8),%rbp 54 adc 24(%rcx,%r10,8),%r12 55 .byte 0x9f # lahf 56 add $255,%r9b 57 sbb (%rcx,%r10,8),%r11 58 mov %r8,(%rdi,%r10,8) 59 sbb 8(%rcx,%r10,8),%r13 60 sbb 16(%rcx,%r10,8),%r14 61 mov %rbx,8(%rdi,%r10,8) 62 sbb 24(%rcx,%r10,8),%r15 63 mov %rbp,16(%rdi,%r10,8) 64 mov %r12,24(%rdi,%r10,8) 65 mov %r13,8(%rsi,%r10,8) 66 setc %r9b 67 add $4,%r10 68 mov %r14,16-32(%rsi,%r10,8) 69 mov %r15,24-32(%rsi,%r10,8) 70 mov %r11,-32(%rsi,%r10,8) 71 jnc lp 72skiplp: 73cmp $2,%r10 74jg case0 75je case1 76jp case2 77case3: .byte 0x9e # sahf 78 mov (%rdx),%r8 79 mov %r8,%r11 80 adc (%rcx),%r8 81 mov 8(%rdx),%rbx 82 mov %rbx,%r13 83 adc 8(%rcx),%rbx 84 mov 16(%rdx),%rbp 85 mov %rbp,%r14 86 adc 16(%rcx),%rbp 87 .byte 0x9f # lahf 88 add $255,%r9b 89 sbb (%rcx),%r11 90 mov %r8,(%rdi) 91 sbb 8(%rcx),%r13 92 sbb 16(%rcx),%r14 93 mov %rbx,8(%rdi) 94 mov %rbp,16(%rdi) 95 mov %r13,8(%rsi) 96 setc %r9b 97 mov %r14,16(%rsi) 98 mov %r11,(%rsi) 99 .byte 0x9e # sahf 100 mov $0,%rax 101 adc $0,%rax 102 add $255,%r9b 103 rcl $1,%rax 104 pop %r15 105 pop %r14 106 pop %r13 107 pop %r12 108 pop %rbp 109 pop %rbx 110 ret 111case2: .byte 0x9e # sahf 112 mov 8(%rdx),%r8 113 mov %r8,%r11 114 adc 8(%rcx),%r8 115 mov 8+8(%rdx),%rbx 116 mov %rbx,%r13 117 adc 8+8(%rcx),%rbx 118 .byte 0x9f # lahf 119 add $255,%r9b 120 sbb 8(%rcx),%r11 121 mov %r8,8(%rdi) 122 sbb 8+8(%rcx),%r13 123 mov %rbx,8+8(%rdi) 124 mov %r13,8+8(%rsi) 125 setc %r9b 126 mov %r11,8(%rsi) 127 .byte 0x9e # sahf 128 mov $0,%rax 129 adc $0,%rax 130 add $255,%r9b 131 rcl $1,%rax 132 pop %r15 133 pop %r14 134 pop %r13 135 pop %r12 136 pop %rbp 137 pop %rbx 138 ret 139case1: .byte 0x9e # sahf 140 mov 16(%rdx),%r8 141 mov %r8,%r11 142 adc 16(%rcx),%r8 143 .byte 0x9f # lahf 144 add $255,%r9b 145 sbb 16(%rcx),%r11 146 mov %r8,16(%rdi) 147 setc %r9b 148 mov %r11,16(%rsi) 149case0: .byte 0x9e # sahf 150 mov $0,%rax 151 adc $0,%rax 152 add $255,%r9b 153 rcl $1,%rax 154 pop %r15 155 pop %r14 156 pop %r13 157 pop %r12 158 pop %rbp 159 pop %rbx 160 ret 161EPILOGUE() 162