1/* 2 * Clip testing in SPARC assembly 3 */ 4 5#if __arch64__ 6#define LDPTR ldx 7#define MATH_ASM_PTR_SIZE 8 8#include "math/m_vector_asm.h" 9#else 10#define LDPTR ld 11#define MATH_ASM_PTR_SIZE 4 12#include "math/m_vector_asm.h" 13#endif 14 15 .register %g2, #scratch 16 .register %g3, #scratch 17 18 .text 19 .align 64 20 21one_dot_zero: 22 .word 0x3f800000 /* 1.0f */ 23 24 /* This trick is shamelessly stolen from the x86 25 * Mesa asm. Very clever, and we can do it too 26 * since we have the necessary add with carry 27 * instructions on Sparc. 28 */ 29clip_table: 30 .byte 0, 1, 0, 2, 4, 5, 4, 6 31 .byte 0, 1, 0, 2, 8, 9, 8, 10 32 .byte 32, 33, 32, 34, 36, 37, 36, 38 33 .byte 32, 33, 32, 34, 40, 41, 40, 42 34 .byte 0, 1, 0, 2, 4, 5, 4, 6 35 .byte 0, 1, 0, 2, 8, 9, 8, 10 36 .byte 16, 17, 16, 18, 20, 21, 20, 22 37 .byte 16, 17, 16, 18, 24, 25, 24, 26 38 .byte 63, 61, 63, 62, 55, 53, 55, 54 39 .byte 63, 61, 63, 62, 59, 57, 59, 58 40 .byte 47, 45, 47, 46, 39, 37, 39, 38 41 .byte 47, 45, 47, 46, 43, 41, 43, 42 42 .byte 63, 61, 63, 62, 55, 53, 55, 54 43 .byte 63, 61, 63, 62, 59, 57, 59, 58 44 .byte 31, 29, 31, 30, 23, 21, 23, 22 45 .byte 31, 29, 31, 30, 27, 25, 27, 26 46 47/* GLvector4f *clip_vec, GLvector4f *proj_vec, 48 GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask, 49 GLboolean viewport_z_enable */ 50 51 .align 64 52__pc_tramp: 53 retl 54 nop 55 56 .globl _mesa_sparc_cliptest_points4 57_mesa_sparc_cliptest_points4: 58 save %sp, -64, %sp 59 call __pc_tramp 60 sub %o7, (. - one_dot_zero - 4), %g1 61 ld [%g1 + 0x0], %f4 62 add %g1, 0x4, %g1 63 64 ld [%i0 + V4F_STRIDE], %l1 65 ld [%i0 + V4F_COUNT], %l3 66 LDPTR [%i0 + V4F_START], %i0 67 LDPTR [%i1 + V4F_START], %i5 68 ldub [%i3], %g2 69 ldub [%i4], %g3 70 sll %g3, 8, %g3 71 or %g2, %g3, %g2 72 73 ld [%i1 + V4F_FLAGS], %g3 74 or %g3, VEC_SIZE_4, %g3 75 st %g3, [%i1 + V4F_FLAGS] 76 mov 3, %g3 77 st %g3, [%i1 + V4F_SIZE] 78 st %l3, [%i1 + V4F_COUNT] 79 clr %l2 80 clr %l0 81 82 /* l0: i 83 * l3: count 84 * l1: stride 85 * l2: c 86 * g2: (tmpAndMask << 8) | tmpOrMask 87 * g1: clip_table 88 * i0: from[stride][i] 89 * i2: clipMask 90 * i5: vProj[4][i] 91 */ 92 931: ld [%i0 + 0x0c], %f3 ! LSU Group 94 ld [%i0 + 0x0c], %g5 ! LSU Group 95 ld [%i0 + 0x08], %g4 ! LSU Group 96 fdivs %f4, %f3, %f8 ! FGM 97 addcc %g5, %g5, %g5 ! IEU1 Group 98 addx %g0, 0x0, %g3 ! IEU1 Group 99 addcc %g4, %g4, %g4 ! IEU1 Group 100 addx %g3, %g3, %g3 ! IEU1 Group 101 subcc %g5, %g4, %g0 ! IEU1 Group 102 ld [%i0 + 0x04], %g4 ! LSU Group 103 addx %g3, %g3, %g3 ! IEU1 Group 104 addcc %g4, %g4, %g4 ! IEU1 Group 105 addx %g3, %g3, %g3 ! IEU1 Group 106 subcc %g5, %g4, %g0 ! IEU1 Group 107 ld [%i0 + 0x00], %g4 ! LSU Group 108 addx %g3, %g3, %g3 ! IEU1 Group 109 addcc %g4, %g4, %g4 ! IEU1 Group 110 addx %g3, %g3, %g3 ! IEU1 Group 111 subcc %g5, %g4, %g0 ! IEU1 Group 112 addx %g3, %g3, %g3 ! IEU1 Group 113 ldub [%g1 + %g3], %g3 ! LSU Group 114 cmp %g3, 0 ! IEU1 Group, stall 115 be 2f ! CTI 116 stb %g3, [%i2] ! LSU 117 sll %g3, 8, %g4 ! IEU1 Group 118 add %l2, 1, %l2 ! IEU0 119 st %g0, [%i5 + 0x00] ! LSU 120 or %g4, 0xff, %g4 ! IEU0 Group 121 or %g2, %g3, %g2 ! IEU1 122 st %g0, [%i5 + 0x04] ! LSU 123 and %g2, %g4, %g2 ! IEU0 Group 124 st %g0, [%i5 + 0x08] ! LSU 125 b 3f ! CTI 126 st %f4, [%i5 + 0x0c] ! LSU Group 1272: ld [%i0 + 0x00], %f0 ! LSU Group 128 ld [%i0 + 0x04], %f1 ! LSU Group 129 ld [%i0 + 0x08], %f2 ! LSU Group 130 fmuls %f0, %f8, %f0 ! FGM 131 st %f0, [%i5 + 0x00] ! LSU Group 132 fmuls %f1, %f8, %f1 ! FGM 133 st %f1, [%i5 + 0x04] ! LSU Group 134 fmuls %f2, %f8, %f2 ! FGM 135 st %f2, [%i5 + 0x08] ! LSU Group 136 st %f8, [%i5 + 0x0c] ! LSU Group 1373: add %i5, 0x10, %i5 ! IEU1 138 add %l0, 1, %l0 ! IEU0 Group 139 add %i2, 1, %i2 ! IEU0 Group 140 cmp %l0, %l3 ! IEU1 Group 141 bne 1b ! CTI 142 add %i0, %l1, %i0 ! IEU0 Group 143 stb %g2, [%i3] ! LSU 144 srl %g2, 8, %g3 ! IEU0 Group 145 cmp %l2, %l3 ! IEU1 Group 146 bl,a 1f ! CTI 147 clr %g3 ! IEU0 1481: stb %g3, [%i4] ! LSU Group 149 ret ! CTI Group 150 restore %i1, 0x0, %o0 151 152 .globl _mesa_sparc_cliptest_points4_np 153_mesa_sparc_cliptest_points4_np: 154 save %sp, -64, %sp 155 156 call __pc_tramp 157 sub %o7, (. - one_dot_zero - 4), %g1 158 add %g1, 0x4, %g1 159 160 ld [%i0 + V4F_STRIDE], %l1 161 ld [%i0 + V4F_COUNT], %l3 162 LDPTR [%i0 + V4F_START], %i0 163 ldub [%i3], %g2 164 ldub [%i4], %g3 165 sll %g3, 8, %g3 166 or %g2, %g3, %g2 167 168 clr %l2 169 clr %l0 170 171 /* l0: i 172 * l3: count 173 * l1: stride 174 * l2: c 175 * g2: (tmpAndMask << 8) | tmpOrMask 176 * g1: clip_table 177 * i0: from[stride][i] 178 * i2: clipMask 179 */ 180 1811: ld [%i0 + 0x0c], %g5 ! LSU Group 182 ld [%i0 + 0x08], %g4 ! LSU Group 183 addcc %g5, %g5, %g5 ! IEU1 Group 184 addx %g0, 0x0, %g3 ! IEU1 Group 185 addcc %g4, %g4, %g4 ! IEU1 Group 186 addx %g3, %g3, %g3 ! IEU1 Group 187 subcc %g5, %g4, %g0 ! IEU1 Group 188 ld [%i0 + 0x04], %g4 ! LSU Group 189 addx %g3, %g3, %g3 ! IEU1 Group 190 addcc %g4, %g4, %g4 ! IEU1 Group 191 addx %g3, %g3, %g3 ! IEU1 Group 192 subcc %g5, %g4, %g0 ! IEU1 Group 193 ld [%i0 + 0x00], %g4 ! LSU Group 194 addx %g3, %g3, %g3 ! IEU1 Group 195 addcc %g4, %g4, %g4 ! IEU1 Group 196 addx %g3, %g3, %g3 ! IEU1 Group 197 subcc %g5, %g4, %g0 ! IEU1 Group 198 addx %g3, %g3, %g3 ! IEU1 Group 199 ldub [%g1 + %g3], %g3 ! LSU Group 200 cmp %g3, 0 ! IEU1 Group, stall 201 be 2f ! CTI 202 stb %g3, [%i2] ! LSU 203 sll %g3, 8, %g4 ! IEU1 Group 204 add %l2, 1, %l2 ! IEU0 205 or %g4, 0xff, %g4 ! IEU0 Group 206 or %g2, %g3, %g2 ! IEU1 207 and %g2, %g4, %g2 ! IEU0 Group 2082: add %l0, 1, %l0 ! IEU0 Group 209 add %i2, 1, %i2 ! IEU0 Group 210 cmp %l0, %l3 ! IEU1 Group 211 bne 1b ! CTI 212 add %i0, %l1, %i0 ! IEU0 Group 213 stb %g2, [%i3] ! LSU 214 srl %g2, 8, %g3 ! IEU0 Group 215 cmp %l2, %l3 ! IEU1 Group 216 bl,a 1f ! CTI 217 clr %g3 ! IEU0 2181: stb %g3, [%i4] ! LSU Group 219 ret ! CTI Group 220 restore %i1, 0x0, %o0 221