1// 2// d_polysa.s 3// x86 assembly-language polygon model drawing code 4// 5 6#include "qasm.h" 7#include "d_ifacea.h" 8 9#if id386 10 11// !!! if this is changed, it must be changed in d_polyse.c too !!! 12#define DPS_MAXSPANS MAXHEIGHT+1 13 // 1 extra for spanpackage that marks end 14 15//#define SPAN_SIZE (((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size) 16#define SPAN_SIZE (1024+1+1+1)*32 17 18 19 20 .data 21 22 .align 4 23p10_minus_p20: .single 0 24p01_minus_p21: .single 0 25temp0: .single 0 26temp1: .single 0 27Ltemp: .single 0 28 29aff8entryvec_table: .long LDraw8, LDraw7, LDraw6, LDraw5 30 .long LDraw4, LDraw3, LDraw2, LDraw1 31 32lzistepx: .long 0 33 34 35 .text 36 37#ifndef NeXT 38 .extern C(D_PolysetSetEdgeTable) 39 .extern C(D_RasterizeAliasPolySmooth) 40#endif 41 42//---------------------------------------------------------------------- 43// affine triangle gradient calculation code 44//---------------------------------------------------------------------- 45 46#if 0 47#define skinwidth 4+0 48 49.globl C(R_PolysetCalcGradients) 50C(R_PolysetCalcGradients): 51 52// p00_minus_p20 = r_p0[0] - r_p2[0]; 53// p01_minus_p21 = r_p0[1] - r_p2[1]; 54// p10_minus_p20 = r_p1[0] - r_p2[0]; 55// p11_minus_p21 = r_p1[1] - r_p2[1]; 56// 57// xstepdenominv = 1.0 / (p10_minus_p20 * p01_minus_p21 - 58// p00_minus_p20 * p11_minus_p21); 59// 60// ystepdenominv = -xstepdenominv; 61 62 fildl C(r_p0)+0 // r_p0[0] 63 fildl C(r_p2)+0 // r_p2[0] | r_p0[0] 64 fildl C(r_p0)+4 // r_p0[1] | r_p2[0] | r_p0[0] 65 fildl C(r_p2)+4 // r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0] 66 fildl C(r_p1)+0 // r_p1[0] | r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0] 67 fildl C(r_p1)+4 // r_p1[1] | r_p1[0] | r_p2[1] | r_p0[1] | 68 // r_p2[0] | r_p0[0] 69 fxch %st(3) // r_p0[1] | r_p1[0] | r_p2[1] | r_p1[1] | 70 // r_p2[0] | r_p0[0] 71 fsub %st(2),%st(0) // p01_minus_p21 | r_p1[0] | r_p2[1] | r_p1[1] | 72 // r_p2[0] | r_p0[0] 73 fxch %st(1) // r_p1[0] | p01_minus_p21 | r_p2[1] | r_p1[1] | 74 // r_p2[0] | r_p0[0] 75 fsub %st(4),%st(0) // p10_minus_p20 | p01_minus_p21 | r_p2[1] | 76 // r_p1[1] | r_p2[0] | r_p0[0] 77 fxch %st(5) // r_p0[0] | p01_minus_p21 | r_p2[1] | 78 // r_p1[1] | r_p2[0] | p10_minus_p20 79 fsubp %st(0),%st(4) // p01_minus_p21 | r_p2[1] | r_p1[1] | 80 // p00_minus_p20 | p10_minus_p20 81 fxch %st(2) // r_p1[1] | r_p2[1] | p01_minus_p21 | 82 // p00_minus_p20 | p10_minus_p20 83 fsubp %st(0),%st(1) // p11_minus_p21 | p01_minus_p21 | 84 // p00_minus_p20 | p10_minus_p20 85 fxch %st(1) // p01_minus_p21 | p11_minus_p21 | 86 // p00_minus_p20 | p10_minus_p20 87 flds C(d_xdenom) // d_xdenom | p01_minus_p21 | p11_minus_p21 | 88 // p00_minus_p20 | p10_minus_p20 89 fxch %st(4) // p10_minus_p20 | p01_minus_p21 | p11_minus_p21 | 90 // p00_minus_p20 | d_xdenom 91 fstps p10_minus_p20 // p01_minus_p21 | p11_minus_p21 | 92 // p00_minus_p20 | d_xdenom 93 fstps p01_minus_p21 // p11_minus_p21 | p00_minus_p20 | xstepdenominv 94 fxch %st(2) // xstepdenominv | p00_minus_p20 | p11_minus_p21 95 96//// ceil () for light so positive steps are exaggerated, negative steps 97//// diminished, pushing us away from underflow toward overflow. Underflow is 98//// very visible, overflow is very unlikely, because of ambient lighting 99// t0 = r_p0[4] - r_p2[4]; 100// t1 = r_p1[4] - r_p2[4]; 101 102 fildl C(r_p2)+16 // r_p2[4] | xstepdenominv | p00_minus_p20 | 103 // p11_minus_p21 104 fildl C(r_p0)+16 // r_p0[4] | r_p2[4] | xstepdenominv | 105 // p00_minus_p20 | p11_minus_p21 106 fildl C(r_p1)+16 // r_p1[4] | r_p0[4] | r_p2[4] | xstepdenominv | 107 // p00_minus_p20 | p11_minus_p21 108 fxch %st(2) // r_p2[4] | r_p0[4] | r_p1[4] | xstepdenominv | 109 // p00_minus_p20 | p11_minus_p21 110 fld %st(0) // r_p2[4] | r_p2[4] | r_p0[4] | r_p1[4] | 111 // xstepdenominv | p00_minus_p20 | p11_minus_p21 112 fsubrp %st(0),%st(2) // r_p2[4] | t0 | r_p1[4] | xstepdenominv | 113 // p00_minus_p20 | p11_minus_p21 114 fsubrp %st(0),%st(2) // t0 | t1 | xstepdenominv | p00_minus_p20 | 115 // p11_minus_p21 116 117// r_lstepx = (int) 118// ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv); 119// r_lstepy = (int) 120// ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv); 121 122 fld %st(0) // t0 | t0 | t1 | xstepdenominv | p00_minus_p20 | 123 // p11_minus_p21 124 fmul %st(5),%st(0) // t0*p11_minus_p21 | t0 | t1 | xstepdenominv | 125 // p00_minus_p20 | p11_minus_p21 126 fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | xstepdenominv | 127 // p00_minus_p20 | p11_minus_p21 128 fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 | 129 // xstepdenominv | p00_minus_p20 | p11_minus_p21 130 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 | 131 // xstepdenominv | p00_minus_p20 | p11_minus_p21 132 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 | 133 // xstepdenominv | p00_minus_p20 | p11_minus_p21 134 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | 135 // t0*p11_minus_p21 | xstepdenominv | 136 // p00_minus_p20 | p11_minus_p21 137 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | 138 // t0*p11_minus_p21 | xstepdenominv | 139 // p00_minus_p20 | p11_minus_p21 140 fmul %st(5),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 | 141 // t1*p01_minus_p21 | t0*p11_minus_p21 | 142 // xstepdenominv | p00_minus_p20 | p11_minus_p21 143 fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 | 144 // t1*p00_minus_p20 | t0*p11_minus_p21 | 145 // xstepdenominv | p00_minus_p20 | p11_minus_p21 146 fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 | 147 // t1*p01_minus_p21 - t0*p11_minus_p21 | 148 // xstepdenominv | p00_minus_p20 | p11_minus_p21 149 fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | 150 // t1*p01_minus_p21 - t0*p11_minus_p21 | 151 // xstepdenominv | p00_minus_p20 | p11_minus_p21 152 fld %st(2) // xstepdenominv | 153 // t1*p00_minus_p20 - t0*p10_minus_p20 | 154 // t1*p01_minus_p21 - t0*p11_minus_p21 | 155 // xstepdenominv | p00_minus_p20 | p11_minus_p21 156 fmuls float_minus_1 // ystepdenominv | 157 // t1*p00_minus_p20 - t0*p10_minus_p20 | 158 // t1*p01_minus_p21 - t0*p11_minus_p21 | 159 // xstepdenominv | p00_minus_p20 | p11_minus_p21 160 fxch %st(2) // t1*p01_minus_p21 - t0*p11_minus_p21 | 161 // t1*p00_minus_p20 - t0*p10_minus_p20 | 162 // ystepdenominv | xstepdenominv | p00_minus_p20 | 163 // p11_minus_p21 164 fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)* 165 // xstepdenominv | 166 // t1*p00_minus_p20 - t0*p10_minus_p20 | 167 // | ystepdenominv | xstepdenominv | 168 // p00_minus_p20 | p11_minus_p21 169 fxch %st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | 170 // (t1*p01_minus_p21 - t0*p11_minus_p21)* 171 // xstepdenominv | ystepdenominv | 172 // xstepdenominv | p00_minus_p20 | p11_minus_p21 173 fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)* 174 // ystepdenominv | 175 // (t1*p01_minus_p21 - t0*p11_minus_p21)* 176 // xstepdenominv | ystepdenominv | 177 // xstepdenominv | p00_minus_p20 | p11_minus_p21 178 fldcw ceil_cw 179 fistpl C(r_lstepy) // r_lstepx | ystepdenominv | xstepdenominv | 180 // p00_minus_p20 | p11_minus_p21 181 fistpl C(r_lstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 | 182 // p11_minus_p21 183 fldcw single_cw 184 185// t0 = r_p0[2] - r_p2[2]; 186// t1 = r_p1[2] - r_p2[2]; 187 188 fildl C(r_p2)+8 // r_p2[2] | ystepdenominv | xstepdenominv | 189 // p00_minus_p20 | p11_minus_p21 190 fildl C(r_p0)+8 // r_p0[2] | r_p2[2] | ystepdenominv | 191 // xstepdenominv | p00_minus_p20 | p11_minus_p21 192 fildl C(r_p1)+8 // r_p1[2] | r_p0[2] | r_p2[2] | ystepdenominv | 193 // xstepdenominv | p00_minus_p20 | p11_minus_p21 194 fxch %st(2) // r_p2[2] | r_p0[2] | r_p1[2] | ystepdenominv | 195 // xstepdenominv | p00_minus_p20 | p11_minus_p21 196 fld %st(0) // r_p2[2] | r_p2[2] | r_p0[2] | r_p1[2] | 197 // ystepdenominv | xstepdenominv | p00_minus_p20 | 198 // p11_minus_p21 199 fsubrp %st(0),%st(2) // r_p2[2] | t0 | r_p1[2] | ystepdenominv | 200 // xstepdenominv | p00_minus_p20 | p11_minus_p21 201 fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv | 202 // p00_minus_p20 | p11_minus_p21 203 204// r_sstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * 205// xstepdenominv); 206// r_sstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * 207// ystepdenominv); 208 209 fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv 210 fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv | 211 // xstepdenominv | p00_minus_p20 | p11_minus_p21 212 fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv | 213 // xstepdenominv | p00_minus_p20 | p11_minus_p21 214 fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 | 215 // ystepdenominv | xstepdenominv | p00_minus_p20 | 216 // p11_minus_p21 217 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 | 218 // ystepdenominv | xstepdenominv | p00_minus_p20 | 219 // p11_minus_p21 220 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 | 221 // ystepdenominv | xstepdenominv | p00_minus_p20 | 222 // p11_minus_p21 223 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | 224 // t0*p11_minus_p21 | ystepdenominv | 225 // xstepdenominv | p00_minus_p20 | p11_minus_p21 226 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | 227 // t0*p11_minus_p21 | ystepdenominv | 228 // xstepdenominv | p00_minus_p20 | p11_minus_p21 229 fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 | 230 // t1*p01_minus_p21 | t0*p11_minus_p21 | 231 // ystepdenominv | xstepdenominv | p00_minus_p20 | 232 // p11_minus_p21 233 fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 | 234 // t1*p00_minus_p20 | t0*p11_minus_p21 | 235 // ystepdenominv | xstepdenominv | p00_minus_p20 | 236 // p11_minus_p21 237 fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 | 238 // t1*p01_minus_p21 - t0*p11_minus_p21 | 239 // ystepdenominv | xstepdenominv | p00_minus_p20 | 240 // p11_minus_p21 241 fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | 242 // t1*p01_minus_p21 - t0*p11_minus_p21 | 243 // ystepdenominv | xstepdenominv | p00_minus_p20 | 244 // p11_minus_p21 245 fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)* 246 // ystepdenominv | 247 // t1*p01_minus_p21 - t0*p11_minus_p21 | 248 // ystepdenominv | xstepdenominv | p00_minus_p20 | 249 // p11_minus_p21 250 fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 | 251 // (t1*p00_minus_p20 - t0*p10_minus_p20)* 252 // ystepdenominv | ystepdenominv | 253 // xstepdenominv | p00_minus_p20 | p11_minus_p21 254 fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)* 255 // xstepdenominv | 256 // (t1*p00_minus_p20 - t0*p10_minus_p20)* 257 // ystepdenominv | ystepdenominv | 258 // xstepdenominv | p00_minus_p20 | p11_minus_p21 259 fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)* 260 // ystepdenominv | 261 // (t1*p01_minus_p21 - t0*p11_minus_p21)* 262 // xstepdenominv | ystepdenominv | 263 // xstepdenominv | p00_minus_p20 | p11_minus_p21 264 fistpl C(r_sstepy) // r_sstepx | ystepdenominv | xstepdenominv | 265 // p00_minus_p20 | p11_minus_p21 266 fistpl C(r_sstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 | 267 // p11_minus_p21 268 269// t0 = r_p0[3] - r_p2[3]; 270// t1 = r_p1[3] - r_p2[3]; 271 272 fildl C(r_p2)+12 // r_p2[3] | ystepdenominv | xstepdenominv | 273 // p00_minus_p20 | p11_minus_p21 274 fildl C(r_p0)+12 // r_p0[3] | r_p2[3] | ystepdenominv | 275 // xstepdenominv | p00_minus_p20 | p11_minus_p21 276 fildl C(r_p1)+12 // r_p1[3] | r_p0[3] | r_p2[3] | ystepdenominv | 277 // xstepdenominv | p00_minus_p20 | p11_minus_p21 278 fxch %st(2) // r_p2[3] | r_p0[3] | r_p1[3] | ystepdenominv | 279 // xstepdenominv | p00_minus_p20 | p11_minus_p21 280 fld %st(0) // r_p2[3] | r_p2[3] | r_p0[3] | r_p1[3] | 281 // ystepdenominv | xstepdenominv | p00_minus_p20 | 282 // p11_minus_p21 283 fsubrp %st(0),%st(2) // r_p2[3] | t0 | r_p1[3] | ystepdenominv | 284 // xstepdenominv | p00_minus_p20 | p11_minus_p21 285 fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv | 286 // p00_minus_p20 | p11_minus_p21 287 288// r_tstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * 289// xstepdenominv); 290// r_tstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * 291// ystepdenominv); 292 293 fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv | 294 // p00_minus_p20 | p11_minus_p21 295 fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv | 296 // xstepdenominv | p00_minus_p20 | p11_minus_p21 297 fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv | 298 // xstepdenominv | p00_minus_p20 | p11_minus_p21 299 fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 | 300 // ystepdenominv | xstepdenominv | p00_minus_p20 | 301 // p11_minus_p21 302 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 | 303 // ystepdenominv | xstepdenominv | p00_minus_p20 | 304 // p11_minus_p21 305 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 | 306 // ystepdenominv | xstepdenominv | p00_minus_p20 | 307 // p11_minus_p21 308 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | 309 // t0*p11_minus_p21 | ystepdenominv | 310 // xstepdenominv | p00_minus_p20 | p11_minus_p21 311 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | 312 // t0*p11_minus_p21 | ystepdenominv | 313 // xstepdenominv | p00_minus_p20 | p11_minus_p21 314 fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 | 315 // t1*p01_minus_p21 | t0*p11_minus_p21 | 316 // ystepdenominv | xstepdenominv | p00_minus_p20 | 317 // p11_minus_p21 318 fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 | 319 // t1*p00_minus_p20 | t0*p11_minus_p21 | 320 // ystepdenominv | xstepdenominv | p00_minus_p20 | 321 // p11_minus_p21 322 fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 | 323 // t1*p01_minus_p21 - t0*p11_minus_p21 | 324 // ystepdenominv | xstepdenominv | p00_minus_p20 | 325 // p11_minus_p21 326 fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | 327 // t1*p01_minus_p21 - t0*p11_minus_p21 | 328 // ystepdenominv | xstepdenominv | p00_minus_p20 | 329 // p11_minus_p21 330 fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)* 331 // ystepdenominv | 332 // t1*p01_minus_p21 - t0*p11_minus_p21 | 333 // ystepdenominv | xstepdenominv | p00_minus_p20 | 334 // p11_minus_p21 335 fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 | 336 // (t1*p00_minus_p20 - t0*p10_minus_p20)* 337 // ystepdenominv | ystepdenominv | 338 // xstepdenominv | p00_minus_p20 | p11_minus_p21 339 fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)* 340 // xstepdenominv | 341 // (t1*p00_minus_p20 - t0*p10_minus_p20)* 342 // ystepdenominv | ystepdenominv | 343 // xstepdenominv | p00_minus_p20 | p11_minus_p21 344 fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)* 345 // ystepdenominv | 346 // (t1*p01_minus_p21 - t0*p11_minus_p21)* 347 // xstepdenominv | ystepdenominv | 348 // xstepdenominv | p00_minus_p20 | p11_minus_p21 349 fistpl C(r_tstepy) // r_tstepx | ystepdenominv | xstepdenominv | 350 // p00_minus_p20 | p11_minus_p21 351 fistpl C(r_tstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 | 352 // p11_minus_p21 353 354// t0 = r_p0[5] - r_p2[5]; 355// t1 = r_p1[5] - r_p2[5]; 356 357 fildl C(r_p2)+20 // r_p2[5] | ystepdenominv | xstepdenominv | 358 // p00_minus_p20 | p11_minus_p21 359 fildl C(r_p0)+20 // r_p0[5] | r_p2[5] | ystepdenominv | 360 // xstepdenominv | p00_minus_p20 | p11_minus_p21 361 fildl C(r_p1)+20 // r_p1[5] | r_p0[5] | r_p2[5] | ystepdenominv | 362 // xstepdenominv | p00_minus_p20 | p11_minus_p21 363 fxch %st(2) // r_p2[5] | r_p0[5] | r_p1[5] | ystepdenominv | 364 // xstepdenominv | p00_minus_p20 | p11_minus_p21 365 fld %st(0) // r_p2[5] | r_p2[5] | r_p0[5] | r_p1[5] | 366 // ystepdenominv | xstepdenominv | p00_minus_p20 | 367 // p11_minus_p21 368 fsubrp %st(0),%st(2) // r_p2[5] | t0 | r_p1[5] | ystepdenominv | 369 // xstepdenominv | p00_minus_p20 | p11_minus_p21 370 fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv | 371 // p00_minus_p20 | p11_minus_p21 372 373// r_zistepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * 374// xstepdenominv); 375// r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * 376// ystepdenominv); 377 378 fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv | 379 // p00_minus_p20 | p11_minus_p21 380 fmulp %st(0),%st(6) // t0 | t1 | ystepdenominv | xstepdenominv | 381 // p00_minus_p20 | t0*p11_minus_p21 382 fxch %st(1) // t1 | t0 | ystepdenominv | xstepdenominv | 383 // p00_minus_p20 | t0*p11_minus_p21 384 fld %st(0) // t1 | t1 | t0 | ystepdenominv | xstepdenominv | 385 // p00_minus_p20 | t0*p11_minus_p21 386 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | ystepdenominv | 387 // xstepdenominv | p00_minus_p20 | 388 // t0*p11_minus_p21 389 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | ystepdenominv | 390 // xstepdenominv | p00_minus_p20 | 391 // t0*p11_minus_p21 392 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | 393 // ystepdenominv | xstepdenominv | p00_minus_p20 | 394 // t0*p11_minus_p21 395 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | 396 // ystepdenominv | xstepdenominv | p00_minus_p20 | 397 // t0*p11_minus_p21 398 fmulp %st(0),%st(5) // t0*p10_minus_p20 | t1*p01_minus_p21 | 399 // ystepdenominv | xstepdenominv | 400 // t1*p00_minus_p20 | t0*p11_minus_p21 401 fxch %st(5) // t0*p11_minus_p21 | t1*p01_minus_p21 | 402 // ystepdenominv | xstepdenominv | 403 // t1*p00_minus_p20 | t0*p10_minus_p20 404 fsubrp %st(0),%st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 | 405 // ystepdenominv | xstepdenominv | 406 // t1*p00_minus_p20 | t0*p10_minus_p20 407 fxch %st(3) // t1*p00_minus_p20 | ystepdenominv | 408 // xstepdenominv | 409 // t1*p01_minus_p21 - t0*p11_minus_p21 | 410 // t0*p10_minus_p20 411 fsubp %st(0),%st(4) // ystepdenominv | xstepdenominv | 412 // t1*p01_minus_p21 - t0*p11_minus_p21 | 413 // t1*p00_minus_p20 - t0*p10_minus_p20 414 fxch %st(1) // xstepdenominv | ystepdenominv | 415 // t1*p01_minus_p21 - t0*p11_minus_p21 | 416 // t1*p00_minus_p20 - t0*p10_minus_p20 417 fmulp %st(0),%st(2) // ystepdenominv | 418 // (t1*p01_minus_p21 - t0*p11_minus_p21) * 419 // xstepdenominv | 420 // t1*p00_minus_p20 - t0*p10_minus_p20 421 fmulp %st(0),%st(2) // (t1*p01_minus_p21 - t0*p11_minus_p21) * 422 // xstepdenominv | 423 // (t1*p00_minus_p20 - t0*p10_minus_p20) * 424 // ystepdenominv 425 fistpl C(r_zistepx) // (t1*p00_minus_p20 - t0*p10_minus_p20) * 426 // ystepdenominv 427 fistpl C(r_zistepy) 428 429// a_sstepxfrac = r_sstepx << 16; 430// a_tstepxfrac = r_tstepx << 16; 431// 432// a_ststepxwhole = r_affinetridesc.skinwidth * (r_tstepx >> 16) + 433// (r_sstepx >> 16); 434 435 movl C(r_sstepx),%eax 436 movl C(r_tstepx),%edx 437 shll $16,%eax 438 shll $16,%edx 439 movl %eax,C(a_sstepxfrac) 440 movl %edx,C(a_tstepxfrac) 441 442 movl C(r_sstepx),%ecx 443 movl C(r_tstepx),%eax 444 sarl $16,%ecx 445 sarl $16,%eax 446 imull skinwidth(%esp) 447 addl %ecx,%eax 448 movl %eax,C(a_ststepxwhole) 449 450 ret 451 452#endif 453 454//---------------------------------------------------------------------- 455// recursive subdivision affine triangle drawing code 456// 457// not C-callable because of stdcall return 458//---------------------------------------------------------------------- 459 460#define lp1 4+16 461#define lp2 8+16 462#define lp3 12+16 463 464.globl C(D_PolysetRecursiveTriangle) 465C(D_PolysetRecursiveTriangle): 466 pushl %ebp // preserve caller stack frame pointer 467 pushl %esi // preserve register variables 468 pushl %edi 469 pushl %ebx 470 471// int *temp; 472// int d; 473// int new[6]; 474// int i; 475// int z; 476// short *zbuf; 477 movl lp2(%esp),%esi 478 movl lp1(%esp),%ebx 479 movl lp3(%esp),%edi 480 481// d = lp2[0] - lp1[0]; 482// if (d < -1 || d > 1) 483// goto split; 484 movl 0(%esi),%eax 485 486 movl 0(%ebx),%edx 487 movl 4(%esi),%ebp 488 489 subl %edx,%eax 490 movl 4(%ebx),%ecx 491 492 subl %ecx,%ebp 493 incl %eax 494 495 cmpl $2,%eax 496 ja LSplit 497 498// d = lp2[1] - lp1[1]; 499// if (d < -1 || d > 1) 500// goto split; 501 movl 0(%edi),%eax 502 incl %ebp 503 504 cmpl $2,%ebp 505 ja LSplit 506 507// d = lp3[0] - lp2[0]; 508// if (d < -1 || d > 1) 509// goto split2; 510 movl 0(%esi),%edx 511 movl 4(%edi),%ebp 512 513 subl %edx,%eax 514 movl 4(%esi),%ecx 515 516 subl %ecx,%ebp 517 incl %eax 518 519 cmpl $2,%eax 520 ja LSplit2 521 522// d = lp3[1] - lp2[1]; 523// if (d < -1 || d > 1) 524// goto split2; 525 movl 0(%ebx),%eax 526 incl %ebp 527 528 cmpl $2,%ebp 529 ja LSplit2 530 531// d = lp1[0] - lp3[0]; 532// if (d < -1 || d > 1) 533// goto split3; 534 movl 0(%edi),%edx 535 movl 4(%ebx),%ebp 536 537 subl %edx,%eax 538 movl 4(%edi),%ecx 539 540 subl %ecx,%ebp 541 incl %eax 542 543 incl %ebp 544 movl %ebx,%edx 545 546 cmpl $2,%eax 547 ja LSplit3 548 549// d = lp1[1] - lp3[1]; 550// if (d < -1 || d > 1) 551// { 552//split3: 553// temp = lp1; 554// lp3 = lp2; 555// lp1 = lp3; 556// lp2 = temp; 557// goto split; 558// } 559// 560// return; // entire tri is filled 561// 562 cmpl $2,%ebp 563 jna LDone 564 565LSplit3: 566 movl %edi,%ebx 567 movl %esi,%edi 568 movl %edx,%esi 569 jmp LSplit 570 571//split2: 572LSplit2: 573 574// temp = lp1; 575// lp1 = lp2; 576// lp2 = lp3; 577// lp3 = temp; 578 movl %ebx,%eax 579 movl %esi,%ebx 580 movl %edi,%esi 581 movl %eax,%edi 582 583//split: 584LSplit: 585 586 subl $24,%esp // allocate space for a new vertex 587 588//// split this edge 589// new[0] = (lp1[0] + lp2[0]) >> 1; 590// new[1] = (lp1[1] + lp2[1]) >> 1; 591// new[2] = (lp1[2] + lp2[2]) >> 1; 592// new[3] = (lp1[3] + lp2[3]) >> 1; 593// new[5] = (lp1[5] + lp2[5]) >> 1; 594 movl 8(%ebx),%eax 595 596 movl 8(%esi),%edx 597 movl 12(%ebx),%ecx 598 599 addl %edx,%eax 600 movl 12(%esi),%edx 601 602 sarl $1,%eax 603 addl %edx,%ecx 604 605 movl %eax,8(%esp) 606 movl 20(%ebx),%eax 607 608 sarl $1,%ecx 609 movl 20(%esi),%edx 610 611 movl %ecx,12(%esp) 612 addl %edx,%eax 613 614 movl 0(%ebx),%ecx 615 movl 0(%esi),%edx 616 617 sarl $1,%eax 618 addl %ecx,%edx 619 620 movl %eax,20(%esp) 621 movl 4(%ebx),%eax 622 623 sarl $1,%edx 624 movl 4(%esi),%ebp 625 626 movl %edx,0(%esp) 627 addl %eax,%ebp 628 629 sarl $1,%ebp 630 movl %ebp,4(%esp) 631 632//// draw the point if splitting a leading edge 633// if (lp2[1] > lp1[1]) 634// goto nodraw; 635 cmpl %eax,4(%esi) 636 jg LNoDraw 637 638// if ((lp2[1] == lp1[1]) && (lp2[0] < lp1[0])) 639// goto nodraw; 640 movl 0(%esi),%edx 641 jnz LDraw 642 643 cmpl %ecx,%edx 644 jl LNoDraw 645 646LDraw: 647 648// z = new[5] >> 16; 649 movl 20(%esp),%edx 650 movl 4(%esp),%ecx 651 652 sarl $16,%edx 653 movl 0(%esp),%ebp 654 655// zbuf = zspantable[new[1]] + new[0]; 656 movl C(zspantable)(,%ecx,4),%eax 657 658// if (z >= *zbuf) 659// { 660 cmpw (%eax,%ebp,2),%dx 661 jnge LNoDraw 662 663// int pix; 664// 665// *zbuf = z; 666 movw %dx,(%eax,%ebp,2) 667 668// pix = d_pcolormap[skintable[new[3]>>16][new[2]>>16]]; 669 movl 12(%esp),%eax 670 671 sarl $16,%eax 672 movl 8(%esp),%edx 673 674 sarl $16,%edx 675 subl %ecx,%ecx 676 677 movl C(skintable)(,%eax,4),%eax 678 movl 4(%esp),%ebp 679 680 movb (%eax,%edx,),%cl 681 movl C(d_pcolormap),%edx 682 683 movb (%edx,%ecx,),%dl 684 movl 0(%esp),%ecx 685 686// d_viewbuffer[d_scantable[new[1]] + new[0]] = pix; 687 movl C(d_scantable)(,%ebp,4),%eax 688 addl %eax,%ecx 689 movl C(d_viewbuffer),%eax 690 movb %dl,(%eax,%ecx,1) 691 692// } 693// 694//nodraw: 695LNoDraw: 696 697//// recursively continue 698// D_PolysetRecursiveTriangle (lp3, lp1, new); 699 pushl %esp 700 pushl %ebx 701 pushl %edi 702 call C(D_PolysetRecursiveTriangle) 703 704// D_PolysetRecursiveTriangle (lp3, new, lp2); 705 movl %esp,%ebx 706 pushl %esi 707 pushl %ebx 708 pushl %edi 709 call C(D_PolysetRecursiveTriangle) 710 addl $24,%esp 711 712LDone: 713 popl %ebx // restore register variables 714 popl %edi 715 popl %esi 716 popl %ebp // restore caller stack frame pointer 717 ret $12 718 719 720//---------------------------------------------------------------------- 721// 8-bpp horizontal span drawing code for affine polygons, with smooth 722// shading and no transparency 723//---------------------------------------------------------------------- 724 725#define pspans 4+8 726 727.globl C(D_PolysetAff8Start) 728C(D_PolysetAff8Start): 729 730.globl C(R_PolysetDrawSpans8_Opaque) 731C(R_PolysetDrawSpans8_Opaque): 732 pushl %esi // preserve register variables 733 pushl %ebx 734 735 movl pspans(%esp),%esi // point to the first span descriptor 736 movl C(r_zistepx),%ecx 737 738 pushl %ebp // preserve caller's stack frame 739 pushl %edi 740 741 rorl $16,%ecx // put high 16 bits of 1/z step in low word 742 movl spanpackage_t_count(%esi),%edx 743 744 movl %ecx,lzistepx 745 746LSpanLoop: 747 748// lcount = d_aspancount - pspanpackage->count; 749// 750// errorterm += erroradjustup; 751// if (errorterm >= 0) 752// { 753// d_aspancount += d_countextrastep; 754// errorterm -= erroradjustdown; 755// } 756// else 757// { 758// d_aspancount += ubasestep; 759// } 760 movl C(d_aspancount),%eax 761 subl %edx,%eax 762 763 movl C(erroradjustup),%edx 764 movl C(errorterm),%ebx 765 addl %edx,%ebx 766 js LNoTurnover 767 768 movl C(erroradjustdown),%edx 769 movl C(d_countextrastep),%edi 770 subl %edx,%ebx 771 movl C(d_aspancount),%ebp 772 movl %ebx,C(errorterm) 773 addl %edi,%ebp 774 movl %ebp,C(d_aspancount) 775 jmp LRightEdgeStepped 776 777LNoTurnover: 778 movl C(d_aspancount),%edi 779 movl C(ubasestep),%edx 780 movl %ebx,C(errorterm) 781 addl %edx,%edi 782 movl %edi,C(d_aspancount) 783 784LRightEdgeStepped: 785 cmpl $1,%eax 786 787 jl LNextSpan 788 jz LExactlyOneLong 789 790// 791// set up advancetable 792// 793 movl C(a_ststepxwhole),%ecx 794 movl C(r_affinetridesc)+atd_skinwidth,%edx 795 796 movl %ecx,advancetable+4 // advance base in t 797 addl %edx,%ecx 798 799 movl %ecx,advancetable // advance extra in t 800 movl C(a_tstepxfrac),%ecx 801 802 movw C(r_lstepx),%cx 803 movl %eax,%edx // count 804 805 movl %ecx,tstep 806 addl $7,%edx 807 808 shrl $3,%edx // count of full and partial loops 809 movl spanpackage_t_sfrac(%esi),%ebx 810 811 movw %dx,%bx 812 movl spanpackage_t_pz(%esi),%ecx 813 814 negl %eax 815 816 movl spanpackage_t_pdest(%esi),%edi 817 andl $7,%eax // 0->0, 1->7, 2->6, ... , 7->1 818 819 subl %eax,%edi // compensate for hardwired offsets 820 subl %eax,%ecx 821 822 subl %eax,%ecx 823 movl spanpackage_t_tfrac(%esi),%edx 824 825 movw spanpackage_t_light(%esi),%dx 826 movl spanpackage_t_zi(%esi),%ebp 827 828 rorl $16,%ebp // put high 16 bits of 1/z in low word 829 pushl %esi 830 831 movl spanpackage_t_ptex(%esi),%esi 832 jmp *aff8entryvec_table(,%eax,4) 833 834// %bx = count of full and partial loops 835// %ebx high word = sfrac 836// %ecx = pz 837// %dx = light 838// %edx high word = tfrac 839// %esi = ptex 840// %edi = pdest 841// %ebp = 1/z 842// tstep low word = C(r_lstepx) 843// tstep high word = C(a_tstepxfrac) 844// C(a_sstepxfrac) low word = 0 845// C(a_sstepxfrac) high word = C(a_sstepxfrac) 846 847LDrawLoop: 848 849// FIXME: do we need to clamp light? We may need at least a buffer bit to 850// keep it from poking into tfrac and causing problems 851 852LDraw8: 853 cmpw (%ecx),%bp 854 jl Lp1 855 xorl %eax,%eax 856 movb %dh,%ah 857 movb (%esi),%al 858 movw %bp,(%ecx) 859 movb 0x12345678(%eax),%al 860LPatch8: 861 movb %al,(%edi) 862Lp1: 863 addl tstep,%edx 864 sbbl %eax,%eax 865 addl lzistepx,%ebp 866 adcl $0,%ebp 867 addl C(a_sstepxfrac),%ebx 868 adcl advancetable+4(,%eax,4),%esi 869 870LDraw7: 871 cmpw 2(%ecx),%bp 872 jl Lp2 873 xorl %eax,%eax 874 movb %dh,%ah 875 movb (%esi),%al 876 movw %bp,2(%ecx) 877 movb 0x12345678(%eax),%al 878LPatch7: 879 movb %al,1(%edi) 880Lp2: 881 addl tstep,%edx 882 sbbl %eax,%eax 883 addl lzistepx,%ebp 884 adcl $0,%ebp 885 addl C(a_sstepxfrac),%ebx 886 adcl advancetable+4(,%eax,4),%esi 887 888LDraw6: 889 cmpw 4(%ecx),%bp 890 jl Lp3 891 xorl %eax,%eax 892 movb %dh,%ah 893 movb (%esi),%al 894 movw %bp,4(%ecx) 895 movb 0x12345678(%eax),%al 896LPatch6: 897 movb %al,2(%edi) 898Lp3: 899 addl tstep,%edx 900 sbbl %eax,%eax 901 addl lzistepx,%ebp 902 adcl $0,%ebp 903 addl C(a_sstepxfrac),%ebx 904 adcl advancetable+4(,%eax,4),%esi 905 906LDraw5: 907 cmpw 6(%ecx),%bp 908 jl Lp4 909 xorl %eax,%eax 910 movb %dh,%ah 911 movb (%esi),%al 912 movw %bp,6(%ecx) 913 movb 0x12345678(%eax),%al 914LPatch5: 915 movb %al,3(%edi) 916Lp4: 917 addl tstep,%edx 918 sbbl %eax,%eax 919 addl lzistepx,%ebp 920 adcl $0,%ebp 921 addl C(a_sstepxfrac),%ebx 922 adcl advancetable+4(,%eax,4),%esi 923 924LDraw4: 925 cmpw 8(%ecx),%bp 926 jl Lp5 927 xorl %eax,%eax 928 movb %dh,%ah 929 movb (%esi),%al 930 movw %bp,8(%ecx) 931 movb 0x12345678(%eax),%al 932LPatch4: 933 movb %al,4(%edi) 934Lp5: 935 addl tstep,%edx 936 sbbl %eax,%eax 937 addl lzistepx,%ebp 938 adcl $0,%ebp 939 addl C(a_sstepxfrac),%ebx 940 adcl advancetable+4(,%eax,4),%esi 941 942LDraw3: 943 cmpw 10(%ecx),%bp 944 jl Lp6 945 xorl %eax,%eax 946 movb %dh,%ah 947 movb (%esi),%al 948 movw %bp,10(%ecx) 949 movb 0x12345678(%eax),%al 950LPatch3: 951 movb %al,5(%edi) 952Lp6: 953 addl tstep,%edx 954 sbbl %eax,%eax 955 addl lzistepx,%ebp 956 adcl $0,%ebp 957 addl C(a_sstepxfrac),%ebx 958 adcl advancetable+4(,%eax,4),%esi 959 960LDraw2: 961 cmpw 12(%ecx),%bp 962 jl Lp7 963 xorl %eax,%eax 964 movb %dh,%ah 965 movb (%esi),%al 966 movw %bp,12(%ecx) 967 movb 0x12345678(%eax),%al 968LPatch2: 969 movb %al,6(%edi) 970Lp7: 971 addl tstep,%edx 972 sbbl %eax,%eax 973 addl lzistepx,%ebp 974 adcl $0,%ebp 975 addl C(a_sstepxfrac),%ebx 976 adcl advancetable+4(,%eax,4),%esi 977 978LDraw1: 979 cmpw 14(%ecx),%bp 980 jl Lp8 981 xorl %eax,%eax 982 movb %dh,%ah 983 movb (%esi),%al 984 movw %bp,14(%ecx) 985 movb 0x12345678(%eax),%al 986LPatch1: 987 movb %al,7(%edi) 988Lp8: 989 addl tstep,%edx 990 sbbl %eax,%eax 991 addl lzistepx,%ebp 992 adcl $0,%ebp 993 addl C(a_sstepxfrac),%ebx 994 adcl advancetable+4(,%eax,4),%esi 995 996 addl $8,%edi 997 addl $16,%ecx 998 999 decw %bx 1000 jnz LDrawLoop 1001 1002 popl %esi // restore spans pointer 1003LNextSpan: 1004 addl $(spanpackage_t_size),%esi // point to next span 1005LNextSpanESISet: 1006 movl spanpackage_t_count(%esi),%edx 1007 cmpl $-999999,%edx // any more spans? 1008 jnz LSpanLoop // yes 1009 1010 popl %edi 1011 popl %ebp // restore the caller's stack frame 1012 popl %ebx // restore register variables 1013 popl %esi 1014 ret 1015 1016 1017// draw a one-long span 1018 1019LExactlyOneLong: 1020 1021 movl spanpackage_t_pz(%esi),%ecx 1022 movl spanpackage_t_zi(%esi),%ebp 1023 1024 rorl $16,%ebp // put high 16 bits of 1/z in low word 1025 movl spanpackage_t_ptex(%esi),%ebx 1026 1027 cmpw (%ecx),%bp 1028 jl LNextSpan 1029 xorl %eax,%eax 1030 movl spanpackage_t_pdest(%esi),%edi 1031 movb spanpackage_t_light+1(%esi),%ah 1032 addl $(spanpackage_t_size),%esi // point to next span 1033 movb (%ebx),%al 1034 movw %bp,(%ecx) 1035 movb 0x12345678(%eax),%al 1036LPatch9: 1037 movb %al,(%edi) 1038 1039 jmp LNextSpanESISet 1040 1041.globl C(D_PolysetAff8End) 1042C(D_PolysetAff8End): 1043 1044 1045.extern C(alias_colormap) 1046// #define pcolormap 4 1047 1048.globl C(D_Aff8Patch) 1049C(D_Aff8Patch): 1050 movl C(alias_colormap),%eax 1051 movl %eax,LPatch1-4 1052 movl %eax,LPatch2-4 1053 movl %eax,LPatch3-4 1054 movl %eax,LPatch4-4 1055 movl %eax,LPatch5-4 1056 movl %eax,LPatch6-4 1057 movl %eax,LPatch7-4 1058 movl %eax,LPatch8-4 1059 movl %eax,LPatch9-4 1060 1061 ret 1062 1063//---------------------------------------------------------------------- 1064// Alias model triangle left-edge scanning code 1065//---------------------------------------------------------------------- 1066 1067#define height 4+16 1068 1069.globl C(R_PolysetScanLeftEdge) 1070C(R_PolysetScanLeftEdge): 1071 pushl %ebp // preserve caller stack frame pointer 1072 pushl %esi // preserve register variables 1073 pushl %edi 1074 pushl %ebx 1075 1076 movl height(%esp),%eax 1077 movl C(d_sfrac),%ecx 1078 andl $0xFFFF,%eax 1079 movl C(d_ptex),%ebx 1080 orl %eax,%ecx 1081 movl C(d_pedgespanpackage),%esi 1082 movl C(d_tfrac),%edx 1083 movl C(d_light),%edi 1084 movl C(d_zi),%ebp 1085 1086// %eax: scratch 1087// %ebx: d_ptex 1088// %ecx: d_sfrac in high word, count in low word 1089// %edx: d_tfrac 1090// %esi: d_pedgespanpackage, errorterm, scratch alternately 1091// %edi: d_light 1092// %ebp: d_zi 1093 1094// do 1095// { 1096 1097LScanLoop: 1098 1099// d_pedgespanpackage->ptex = ptex; 1100// d_pedgespanpackage->pdest = d_pdest; 1101// d_pedgespanpackage->pz = d_pz; 1102// d_pedgespanpackage->count = d_aspancount; 1103// d_pedgespanpackage->light = d_light; 1104// d_pedgespanpackage->zi = d_zi; 1105// d_pedgespanpackage->sfrac = d_sfrac << 16; 1106// d_pedgespanpackage->tfrac = d_tfrac << 16; 1107 movl %ebx,spanpackage_t_ptex(%esi) 1108 movl C(d_pdest),%eax 1109 movl %eax,spanpackage_t_pdest(%esi) 1110 movl C(d_pz),%eax 1111 movl %eax,spanpackage_t_pz(%esi) 1112 movl C(d_aspancount),%eax 1113 movl %eax,spanpackage_t_count(%esi) 1114 movl %edi,spanpackage_t_light(%esi) 1115 movl %ebp,spanpackage_t_zi(%esi) 1116 movl %ecx,spanpackage_t_sfrac(%esi) 1117 movl %edx,spanpackage_t_tfrac(%esi) 1118 1119// pretouch the next cache line 1120 movb spanpackage_t_size(%esi),%al 1121 1122// d_pedgespanpackage++; 1123 addl $(spanpackage_t_size),%esi 1124 movl C(erroradjustup),%eax 1125 movl %esi,C(d_pedgespanpackage) 1126 1127// errorterm += erroradjustup; 1128 movl C(errorterm),%esi 1129 addl %eax,%esi 1130 movl C(d_pdest),%eax 1131 1132// if (errorterm >= 0) 1133// { 1134 js LNoLeftEdgeTurnover 1135 1136// errorterm -= erroradjustdown; 1137// d_pdest += d_pdestextrastep; 1138 subl C(erroradjustdown),%esi 1139 addl C(d_pdestextrastep),%eax 1140 movl %esi,C(errorterm) 1141 movl %eax,C(d_pdest) 1142 1143// d_pz += d_pzextrastep; 1144// d_aspancount += d_countextrastep; 1145// d_ptex += d_ptexextrastep; 1146// d_sfrac += d_sfracextrastep; 1147// d_ptex += d_sfrac >> 16; 1148// d_sfrac &= 0xFFFF; 1149// d_tfrac += d_tfracextrastep; 1150 movl C(d_pz),%eax 1151 movl C(d_aspancount),%esi 1152 addl C(d_pzextrastep),%eax 1153 addl C(d_sfracextrastep),%ecx 1154 adcl C(d_ptexextrastep),%ebx 1155 addl C(d_countextrastep),%esi 1156 movl %eax,C(d_pz) 1157 movl C(d_tfracextrastep),%eax 1158 movl %esi,C(d_aspancount) 1159 addl %eax,%edx 1160 1161// if (d_tfrac & 0x10000) 1162// { 1163 jnc LSkip1 1164 1165// d_ptex += r_affinetridesc.skinwidth; 1166// d_tfrac &= 0xFFFF; 1167 addl C(r_affinetridesc)+atd_skinwidth,%ebx 1168 1169// } 1170 1171LSkip1: 1172 1173// d_light += d_lightextrastep; 1174// d_zi += d_ziextrastep; 1175 addl C(d_lightextrastep),%edi 1176 addl C(d_ziextrastep),%ebp 1177 1178// } 1179 movl C(d_pedgespanpackage),%esi 1180 decl %ecx 1181 testl $0xFFFF,%ecx 1182 jnz LScanLoop 1183 1184 popl %ebx 1185 popl %edi 1186 popl %esi 1187 popl %ebp 1188 ret 1189 1190// else 1191// { 1192 1193LNoLeftEdgeTurnover: 1194 movl %esi,C(errorterm) 1195 1196// d_pdest += d_pdestbasestep; 1197 addl C(d_pdestbasestep),%eax 1198 movl %eax,C(d_pdest) 1199 1200// d_pz += d_pzbasestep; 1201// d_aspancount += ubasestep; 1202// d_ptex += d_ptexbasestep; 1203// d_sfrac += d_sfracbasestep; 1204// d_ptex += d_sfrac >> 16; 1205// d_sfrac &= 0xFFFF; 1206 movl C(d_pz),%eax 1207 movl C(d_aspancount),%esi 1208 addl C(d_pzbasestep),%eax 1209 addl C(d_sfracbasestep),%ecx 1210 adcl C(d_ptexbasestep),%ebx 1211 addl C(ubasestep),%esi 1212 movl %eax,C(d_pz) 1213 movl %esi,C(d_aspancount) 1214 1215// d_tfrac += d_tfracbasestep; 1216 movl C(d_tfracbasestep),%esi 1217 addl %esi,%edx 1218 1219// if (d_tfrac & 0x10000) 1220// { 1221 jnc LSkip2 1222 1223// d_ptex += r_affinetridesc.skinwidth; 1224// d_tfrac &= 0xFFFF; 1225 addl C(r_affinetridesc)+atd_skinwidth,%ebx 1226 1227// } 1228 1229LSkip2: 1230 1231// d_light += d_lightbasestep; 1232// d_zi += d_zibasestep; 1233 addl C(d_lightbasestep),%edi 1234 addl C(d_zibasestep),%ebp 1235 1236// } 1237// } while (--height); 1238 movl C(d_pedgespanpackage),%esi 1239 decl %ecx 1240 testl $0xFFFF,%ecx 1241 jnz LScanLoop 1242 1243 popl %ebx 1244 popl %edi 1245 popl %esi 1246 popl %ebp 1247 ret 1248 1249#endif // id386 1250 1251