1dnl IA-64 mpn_sec_tabselect. 2 3dnl Copyright 2011 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C cycles/limb 34C Itanium: ? 35C Itanium 2: 2.5 36 37C NOTES 38C * Using software pipelining could trivially yield 2 c/l without unrolling, 39C or 1+epsilon with unrolling. (This code was modelled after the powerpc64 40C code, for simplicity.) 41 42C mpn_sec_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which) 43define(`rp', `r32') 44define(`tp', `r33') 45define(`n', `r34') 46define(`nents', `r35') 47define(`which', `r36') 48 49define(`mask', `r8') 50 51define(`rp1', `r32') 52define(`tp1', `r33') 53define(`rp2', `r14') 54define(`tp2', `r15') 55 56ASM_START() 57PROLOGUE(mpn_sec_tabselect) 58 .prologue 59 .save ar.lc, r2 60 .body 61ifdef(`HAVE_ABI_32',` 62 {.mmi; addp4 rp = 0, rp C M I 63 addp4 tp = 0, tp C M I 64 zxt4 n = n C I 65}{.mii; nop 0 66 zxt4 nents = nents C I 67 zxt4 which = which C I 68 ;; 69}') 70 {.mmi; add rp2 = 8, rp1 71 add tp2 = 8, tp1 72 add r6 = -2, n 73 ;; 74}{.mmi; cmp.eq p10, p0 = 1, n 75 and r9 = 1, n C set cr0 for use in inner loop 76 shr.u r6 = r6, 1 C inner loop count 77 ;; 78}{.mmi; cmp.eq p8, p0 = 0, r9 79 sub which = nents, which 80 shl n = n, 3 81 ;; 82} 83L(outer): 84 {.mmi; cmp.eq p6, p7 = which, nents C are we at the selected table entry? 85 nop 0 86 mov ar.lc = r6 C I0 87 ;; 88}{.mmb; 89 (p6) mov mask = -1 90 (p7) mov mask = 0 91 (p8) br.dptk L(top) C branch to loop entry if n even 92 ;; 93}{.mmi; ld8 r16 = [tp1], 8 94 add tp2 = 8, tp2 95 nop 0 96 ;; 97}{.mmi; ld8 r18 = [rp1] 98 and r16 = r16, mask 99 nop 0 100 ;; 101}{.mmi; andcm r18 = r18, mask 102 ;; 103 or r16 = r16, r18 104 nop 0 105 ;; 106}{.mmb; st8 [rp1] = r16, 8 107 add rp2 = 8, rp2 108 (p10) br.dpnt L(end) 109} 110 ALIGN(32) 111L(top): 112 {.mmi; ld8 r16 = [tp1], 16 113 ld8 r17 = [tp2], 16 114 nop 0 115 ;; 116}{.mmi; ld8 r18 = [rp1] 117 and r16 = r16, mask 118 nop 0 119}{.mmi; ld8 r19 = [rp2] 120 and r17 = r17, mask 121 nop 0 122 ;; 123}{.mmi; andcm r18 = r18, mask 124 andcm r19 = r19, mask 125 nop 0 126 ;; 127}{.mmi; or r16 = r16, r18 128 or r17 = r17, r19 129 nop 0 130 ;; 131}{.mmb; st8 [rp1] = r16, 16 132 st8 [rp2] = r17, 16 133 br.cloop.dptk L(top) 134 ;; 135} 136L(end): 137 {.mmi; sub rp1 = rp1, n C move rp back to beginning 138 sub rp2 = rp2, n C move rp back to beginning 139 cmp.ne p9, p0 = 1, nents 140}{.mmb; add nents = -1, nents 141 nop 0 142 (p9) br.dptk L(outer) 143 ;; 144}{.mib; nop 0 145 nop 0 146 br.ret.sptk.many b0 147} 148EPILOGUE() 149