1dnl IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n, 2dnl mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations. 3 4dnl Copyright 2003, 2004, 2005 Free Software Foundation, Inc. 5dnl 6dnl This file is part of the GNU MP Library. 7dnl 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of the GNU Lesser General Public License as published 10dnl by the Free Software Foundation; either version 3 of the License, or (at 11dnl your option) any later version. 12dnl 13dnl The GNU MP Library is distributed in the hope that it will be useful, but 14dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16dnl License for more details. 17dnl 18dnl You should have received a copy of the GNU Lesser General Public License 19dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21include(`../config.m4') 22 23C cycles/limb 24C Itanium: 2 25C Itanium 2: 1 26 27C TODO 28C * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in 29C wind-down code). 30 31C INPUT PARAMETERS 32define(`rp', `r32') 33define(`up', `r33') 34define(`vp', `r34') 35define(`n', `r35') 36 37define(`OPERATION_ior_n',1) 38 39ifdef(`OPERATION_and_n', 40` define(`func',`mpn_and_n') 41 define(`logop', `and $1 = $2, $3') 42 define(`notormov', `mov $1 = $2')') 43ifdef(`OPERATION_andn_n', 44` define(`func',`mpn_andn_n') 45 define(`logop', `andcm $1 = $2, $3') 46 define(`notormov', `mov $1 = $2')') 47ifdef(`OPERATION_nand_n', 48` define(`func',`mpn_nand_n') 49 define(`logop', `and $1 = $2, $3') 50 define(`notormov', `sub $1 = -1, $2')') 51ifdef(`OPERATION_ior_n', 52` define(`func',`mpn_ior_n') 53 define(`logop', `or $1 = $2, $3') 54 define(`notormov', `mov $1 = $2')') 55ifdef(`OPERATION_iorn_n', 56` define(`func',`mpn_iorn_n') 57 define(`logop', `andcm $1 = $3, $2') 58 define(`notormov', `sub $1 = -1, $2')') 59ifdef(`OPERATION_nior_n', 60` define(`func',`mpn_nior_n') 61 define(`logop', `or $1 = $2, $3') 62 define(`notormov', `sub $1 = -1, $2')') 63ifdef(`OPERATION_xor_n', 64` define(`func',`mpn_xor_n') 65 define(`logop', `xor $1 = $2, $3') 66 define(`notormov', `mov $1 = $2')') 67ifdef(`OPERATION_xnor_n', 68` define(`func',`mpn_xnor_n') 69 define(`logop', `xor $1 = $2, $3') 70 define(`notormov', `sub $1 = -1, $2')') 71 72ASM_START() 73PROLOGUE(func) 74 .prologue 75 .save ar.lc, r2 76 .body 77ifdef(`HAVE_ABI_32', 78` addp4 rp = 0, rp C M I 79 addp4 up = 0, up C M I 80 addp4 vp = 0, vp C M I 81 zxt4 n = n C I 82 ;; 83') 84{.mmi 85 ld8 r10 = [up], 8 C M 86 ld8 r11 = [vp], 8 C M 87 mov.i r2 = ar.lc C I0 88} 89{.mmi 90 and r14 = 3, n C M I 91 cmp.lt p15, p14 = 4, n C M I 92 shr.u n = n, 2 C I0 93 ;; 94} 95{.mmi 96 cmp.eq p6, p0 = 1, r14 C M I 97 cmp.eq p7, p0 = 2, r14 C M I 98 cmp.eq p8, p0 = 3, r14 C M I 99} 100{.bbb 101 (p6) br.dptk .Lb01 C B 102 (p7) br.dptk .Lb10 C B 103 (p8) br.dptk .Lb11 C B 104} 105 106.Lb00: ld8 r17 = [up], 8 C M 107 ld8 r21 = [vp], 8 C M 108 add n = -2, n C M I 109 ;; 110 ld8 r18 = [up], 8 C M 111 ld8 r22 = [vp], 8 C M 112 ;; 113 ld8 r19 = [up], 8 C M 114 ld8 r23 = [vp], 8 C M 115 (p15) br.cond.dpnt .grt4 C B 116 117 logop( r14, r10, r11) C M I 118 ;; 119 logop( r15, r17, r21) C M I 120 notormov( r8, r14) C M I 121 br .Lcj4 C B 122 123.grt4: logop( r14, r10, r11) C M I 124 ld8 r16 = [up], 8 C M 125 ld8 r20 = [vp], 8 C M 126 ;; 127 logop( r15, r17, r21) C M I 128 ld8 r17 = [up], 8 C M 129 mov.i ar.lc = n C I0 130 notormov( r8, r14) C M I 131 ld8 r21 = [vp], 8 C M 132 br .LL00 C B 133 134.Lb01: add n = -1, n C M I 135 logop( r15, r10, r11) C M I 136 (p15) br.cond.dpnt .grt1 C B 137 ;; 138 139 notormov( r9, r15) C M I 140 br .Lcj1 C B 141 142.grt1: ld8 r16 = [up], 8 C M 143 ld8 r20 = [vp], 8 C M 144 ;; 145 ld8 r17 = [up], 8 C M 146 ld8 r21 = [vp], 8 C M 147 mov.i ar.lc = n C I0 148 ;; 149 ld8 r18 = [up], 8 C M 150 ld8 r22 = [vp], 8 C M 151 ;; 152 ld8 r19 = [up], 8 C M 153 ld8 r23 = [vp], 8 C M 154 br.cloop.dptk .grt5 C B 155 ;; 156 157 logop( r14, r16, r20) C M I 158 notormov( r9, r15) C M I 159 br .Lcj5 C B 160 161.grt5: logop( r14, r16, r20) C M I 162 ld8 r16 = [up], 8 C M 163 notormov( r9, r15) C M I 164 ld8 r20 = [vp], 8 C M 165 br .LL01 C B 166 167.Lb10: ld8 r19 = [up], 8 C M 168 ld8 r23 = [vp], 8 C M 169 (p15) br.cond.dpnt .grt2 C B 170 171 logop( r14, r10, r11) C M I 172 ;; 173 logop( r15, r19, r23) C M I 174 notormov( r8, r14) C M I 175 br .Lcj2 C B 176 177.grt2: ld8 r16 = [up], 8 C M 178 ld8 r20 = [vp], 8 C M 179 add n = -1, n C M I 180 ;; 181 ld8 r17 = [up], 8 C M 182 ld8 r21 = [vp], 8 C M 183 logop( r14, r10, r11) C M I 184 ;; 185 ld8 r18 = [up], 8 C M 186 ld8 r22 = [vp], 8 C M 187 mov.i ar.lc = n C I0 188 ;; 189 logop( r15, r19, r23) C M I 190 ld8 r19 = [up], 8 C M 191 notormov( r8, r14) C M I 192 ld8 r23 = [vp], 8 C M 193 br.cloop.dptk .Loop C B 194 br .Lcj6 C B 195 196.Lb11: ld8 r18 = [up], 8 C M 197 ld8 r22 = [vp], 8 C M 198 add n = -1, n C M I 199 ;; 200 ld8 r19 = [up], 8 C M 201 ld8 r23 = [vp], 8 C M 202 logop( r15, r10, r11) C M I 203 (p15) br.cond.dpnt .grt3 C B 204 ;; 205 206 logop( r14, r18, r22) C M I 207 notormov( r9, r15) C M I 208 br .Lcj3 C B 209 210.grt3: ld8 r16 = [up], 8 C M 211 ld8 r20 = [vp], 8 C M 212 ;; 213 ld8 r17 = [up], 8 C M 214 ld8 r21 = [vp], 8 C M 215 mov.i ar.lc = n C I0 216 ;; 217 logop( r14, r18, r22) C M I 218 ld8 r18 = [up], 8 C M 219 notormov( r9, r15) C M I 220 ld8 r22 = [vp], 8 C M 221 br .LL11 C B 222 223C *** MAIN LOOP START *** 224 ALIGN(32) 225.Loop: st8 [rp] = r8, 8 C M 226 logop( r14, r16, r20) C M I 227 notormov( r9, r15) C M I 228 ld8 r16 = [up], 8 C M 229 ld8 r20 = [vp], 8 C M 230 nop.b 0 231 ;; 232.LL01: st8 [rp] = r9, 8 C M 233 logop( r15, r17, r21) C M I 234 notormov( r8, r14) C M I 235 ld8 r17 = [up], 8 C M 236 ld8 r21 = [vp], 8 C M 237 nop.b 0 238 ;; 239.LL00: st8 [rp] = r8, 8 C M 240 logop( r14, r18, r22) C M I 241 notormov( r9, r15) C M I 242 ld8 r18 = [up], 8 C M 243 ld8 r22 = [vp], 8 C M 244 nop.b 0 245 ;; 246.LL11: st8 [rp] = r9, 8 C M 247 logop( r15, r19, r23) C M I 248 notormov( r8, r14) C M I 249 ld8 r19 = [up], 8 C M 250 ld8 r23 = [vp], 8 C M 251 br.cloop.dptk .Loop ;; C B 252C *** MAIN LOOP END *** 253 254.Lcj6: st8 [rp] = r8, 8 C M 255 logop( r14, r16, r20) C M I 256 notormov( r9, r15) C M I 257 ;; 258.Lcj5: st8 [rp] = r9, 8 C M 259 logop( r15, r17, r21) C M I 260 notormov( r8, r14) C M I 261 ;; 262.Lcj4: st8 [rp] = r8, 8 C M 263 logop( r14, r18, r22) C M I 264 notormov( r9, r15) C M I 265 ;; 266.Lcj3: st8 [rp] = r9, 8 C M 267 logop( r15, r19, r23) C M I 268 notormov( r8, r14) C M I 269 ;; 270.Lcj2: st8 [rp] = r8, 8 C M 271 notormov( r9, r15) C M I 272 ;; 273.Lcj1: st8 [rp] = r9, 8 C M 274 mov.i ar.lc = r2 C I0 275 br.ret.sptk.many b0 C B 276EPILOGUE() 277ASM_END() 278