1 /*
2  * Copyright 1999, 2000 ATI Technologies Inc., Markham, Ontario,
3  *                      Precision Insight, Inc., Cedar Park, Texas, and
4  *                      VA Linux Systems Inc., Fremont, California.
5  *
6  * All Rights Reserved.
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining
9  * a copy of this software and associated documentation files (the
10  * "Software"), to deal in the Software without restriction, including
11  * without limitation on the rights to use, copy, modify, merge,
12  * publish, distribute, sublicense, and/or sell copies of the Software,
13  * and to permit persons to whom the Software is furnished to do so,
14  * subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice (including the
17  * next paragraph) shall be included in all copies or substantial
18  * portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23  * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, PRECISION INSIGHT, VA LINUX
24  * SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
25  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
26  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27  * OTHER DEALINGS IN THE SOFTWARE.
28  */
29 
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33 
34 /*
35  * Authors:
36  *   Rickard E. Faith <faith@valinux.com>
37  *   Kevin E. Martin <martin@valinux.com>
38  *   Alan Hourihane <alanh@fairlite.demon.co.uk>
39  *
40  * Credits:
41  *
42  *   Thanks to Alan Hourihane <alanh@fairlite.demon..co.uk> and SuSE for
43  *   providing source code to their 3.3.x Rage 128 driver.  Portions of
44  *   this file are based on the acceleration code for that driver.
45  *
46  * References:
47  *
48  *   RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical
49  *   Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April
50  *   1999.
51  *
52  *   RAGE 128 Software Development Manual (Technical Reference Manual P/N
53  *   SDK-G04000 Rev. 0.01), ATI Technologies: June 1999.
54  *
55  * Notes on unimplemented XAA optimizations:
56  *
57  *   SetClipping:   The Rage128 doesn't support the full 16bit registers needed
58  *                  for XAA clip rect support.
59  *   SolidFillTrap: This will probably work if we can compute the correct
60  *                  Bresenham error values.
61  *   TwoPointLine:  The Rage 128 supports Bresenham lines instead.
62  *   DashedLine with non-power-of-two pattern length: Apparently, there is
63  *                  no way to set the length of the pattern -- it is always
64  *                  assumed to be 8 or 32 (or 1024?).
65  *   ScreenToScreenColorExpandFill: See p. 4-17 of the Technical Reference
66  *                  Manual where it states that monochrome expansion of frame
67  *                  buffer data is not supported.
68  *   CPUToScreenColorExpandFill, direct: The implementation here uses a hybrid
69  *                  direct/indirect method.  If we had more data registers,
70  *                  then we could do better.  If XAA supported a trigger write
71  *                  address, the code would be simpler.
72  * (Alan Hourihane) Update. We now use purely indirect and clip the full
73  *                  rectangle. Seems as the direct method has some problems
74  *                  with this, although this indirect method is much faster
75  *                  than the old method of setting up the engine per scanline.
76  *                  This code was the basis of the Radeon work we did.
77  *   Color8x8PatternFill: Apparently, an 8x8 color brush cannot take an 8x8
78  *                  pattern from frame buffer memory.
79  *   ImageWrites:   See CPUToScreenColorExpandFill.
80  *
81  */
82 
83 #define R128_TRAPEZOIDS 0       /* Trapezoids don't work               */
84 
85 				/* Driver data structures */
86 #include <errno.h>
87 
88 #include "r128.h"
89 #include "r128_reg.h"
90 #include "r128_probe.h"
91 #ifdef R128DRI
92 #include "r128_sarea.h"
93 #define _XF86DRI_SERVER_
94 #include "r128_dri.h"
95 #include "r128_common.h"
96 #endif
97 
98 				/* Line support */
99 #include "miline.h"
100 
101 				/* X and server generic header files */
102 #include "xf86.h"
103 
104 #ifdef HAVE_XAA_H
105 #include "r128_rop.h"
106 #endif
107 
108 extern int getR128EntityIndex(void);
109 
110 /* Flush all dirty data in the Pixel Cache to memory. */
R128EngineFlush(ScrnInfoPtr pScrn)111 void R128EngineFlush(ScrnInfoPtr pScrn)
112 {
113     R128InfoPtr   info      = R128PTR(pScrn);
114     unsigned char *R128MMIO = info->MMIO;
115     int           i;
116 
117     OUTREGP(R128_PC_NGUI_CTLSTAT, R128_PC_FLUSH_ALL, ~R128_PC_FLUSH_ALL);
118     for (i = 0; i < R128_TIMEOUT; i++) {
119 	if (!(INREG(R128_PC_NGUI_CTLSTAT) & R128_PC_BUSY)) break;
120     }
121 }
122 
123 /* Reset graphics card to known state. */
R128EngineReset(ScrnInfoPtr pScrn)124 void R128EngineReset(ScrnInfoPtr pScrn)
125 {
126     R128InfoPtr   info      = R128PTR(pScrn);
127     unsigned char *R128MMIO = info->MMIO;
128     uint32_t      clock_cntl_index;
129     uint32_t      mclk_cntl;
130     uint32_t      gen_reset_cntl;
131 
132     R128EngineFlush(pScrn);
133 
134     clock_cntl_index = INREG(R128_CLOCK_CNTL_INDEX);
135     mclk_cntl        = INPLL(pScrn, R128_MCLK_CNTL);
136 
137     OUTPLL(R128_MCLK_CNTL, mclk_cntl | R128_FORCE_GCP | R128_FORCE_PIPE3D_CP);
138 
139     gen_reset_cntl   = INREG(R128_GEN_RESET_CNTL);
140 
141     OUTREG(R128_GEN_RESET_CNTL, gen_reset_cntl | R128_SOFT_RESET_GUI);
142     INREG(R128_GEN_RESET_CNTL);
143     OUTREG(R128_GEN_RESET_CNTL,
144 	gen_reset_cntl & (uint32_t)(~R128_SOFT_RESET_GUI));
145     INREG(R128_GEN_RESET_CNTL);
146 
147     OUTPLL(R128_MCLK_CNTL,        mclk_cntl);
148     OUTREG(R128_CLOCK_CNTL_INDEX, clock_cntl_index);
149     OUTREG(R128_GEN_RESET_CNTL,   gen_reset_cntl);
150 }
151 
152 /* The FIFO has 64 slots.  This routines waits until at least `entries' of
153    these slots are empty. */
R128WaitForFifoFunction(ScrnInfoPtr pScrn,int entries)154 void R128WaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
155 {
156     R128InfoPtr   info      = R128PTR(pScrn);
157     unsigned char *R128MMIO = info->MMIO;
158     int           i;
159 
160     for (;;) {
161 	for (i = 0; i < R128_TIMEOUT; i++) {
162 	    info->fifo_slots = INREG(R128_GUI_STAT) & R128_GUI_FIFOCNT_MASK;
163 	    if (info->fifo_slots >= entries) return;
164 	}
165 
166     DEBUG(xf86DrvMsg(pScrn->scrnIndex, X_INFO,
167                     "FIFO timed out: %lu entries, "
168                     "stat = 0x%08lx, probe = 0x%08lx\n",
169                     INREG(R128_GUI_STAT) & R128_GUI_FIFOCNT_MASK,
170                     INREG(R128_GUI_STAT),
171                     INREG(R128_GUI_PROBE)));
172 	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
173 		   "FIFO timed out, resetting engine...\n");
174 	R128EngineReset(pScrn);
175 #ifdef R128DRI
176 	R128CCE_RESET(pScrn, info);
177 	if (info->directRenderingEnabled) {
178 	    R128CCE_START(pScrn, info);
179 	}
180 #endif
181     }
182 }
183 
184 /* Wait for the graphics engine to be completely idle: the FIFO has
185    drained, the Pixel Cache is flushed, and the engine is idle.  This is a
186    standard "sync" function that will make the hardware "quiescent". */
R128WaitForIdle(ScrnInfoPtr pScrn)187 void R128WaitForIdle(ScrnInfoPtr pScrn)
188 {
189     R128InfoPtr   info      = R128PTR(pScrn);
190     unsigned char *R128MMIO = info->MMIO;
191     int           i;
192 
193     R128WaitForFifoFunction(pScrn, 64);
194 
195     for (;;) {
196 	for (i = 0; i < R128_TIMEOUT; i++) {
197 	    if (!(INREG(R128_GUI_STAT) & R128_GUI_ACTIVE)) {
198 		R128EngineFlush(pScrn);
199 		return;
200 	    }
201 	}
202 
203     DEBUG(xf86DrvMsg(pScrn->scrnIndex, X_INFO,
204                         "Idle timed out: %lu entries, "
205                         "stat = 0x%08lx, probe = 0x%08lx\n",
206                         INREG(R128_GUI_STAT) & R128_GUI_FIFOCNT_MASK,
207                         INREG(R128_GUI_STAT),
208                         INREG(R128_GUI_PROBE)));
209 	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
210 		   "Idle timed out, resetting engine...\n");
211 #ifdef R128DRI
212         R128CCE_STOP(pScrn, info);
213 #endif
214 	R128EngineReset(pScrn);
215 #ifdef R128DRI
216 	R128CCE_RESET(pScrn, info);
217 	if (info->directRenderingEnabled) {
218 	    R128CCE_START(pScrn, info);
219 	}
220 #endif
221     }
222 }
223 
224 #ifdef R128DRI
225 /* Wait until the CCE is completely idle: the FIFO has drained and the
226  * CCE is idle.
227  */
R128CCEWaitForIdle(ScrnInfoPtr pScrn)228 void R128CCEWaitForIdle(ScrnInfoPtr pScrn)
229 {
230     R128InfoPtr info = R128PTR(pScrn);
231     int         ret, i;
232 
233     FLUSH_RING();
234 
235     for (;;) {
236         i = 0;
237         do {
238             ret = drmCommandNone(info->drmFD, DRM_R128_CCE_IDLE);
239         } while ( ret && errno == EBUSY && i++ < (R128_IDLE_RETRY * R128_IDLE_RETRY) );
240 
241 	if (ret && ret != -EBUSY) {
242 	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
243 		       "%s: CCE idle %d\n", __FUNCTION__, ret);
244 	}
245 
246 	if (i > R128_IDLE_RETRY) {
247 	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
248 		       "%s: (DEBUG) CCE idle took i = %d\n", __FUNCTION__, i);
249 	}
250 
251 	if (ret == 0) return;
252 
253 	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
254 		   "Idle timed out, resetting engine...\n");
255 	R128CCE_STOP(pScrn, info);
256 	R128EngineReset(pScrn);
257 
258 	/* Always restart the engine when doing CCE 2D acceleration */
259 	R128CCE_RESET(pScrn, info);
260 	R128CCE_START(pScrn, info);
261     }
262 }
263 
R128CCEStop(ScrnInfoPtr pScrn)264 int R128CCEStop(ScrnInfoPtr pScrn)
265 {
266     R128InfoPtr    info = R128PTR(pScrn);
267     drmR128CCEStop stop;
268     int            ret, i;
269 
270     stop.flush = 1;
271     stop.idle  = 1;
272 
273     ret = drmCommandWrite( info->drmFD, DRM_R128_CCE_STOP,
274                            &stop, sizeof(drmR128CCEStop) );
275 
276     if ( ret == 0 ) {
277         return 0;
278     } else if ( errno != EBUSY ) {
279         return -errno;
280     }
281 
282     stop.flush = 0;
283 
284     i = 0;
285     do {
286         ret = drmCommandWrite( info->drmFD, DRM_R128_CCE_STOP,
287                                &stop, sizeof(drmR128CCEStop) );
288     } while ( ret && errno == EBUSY && i++ < R128_IDLE_RETRY );
289 
290     if ( ret == 0 ) {
291         return 0;
292     } else if ( errno != EBUSY ) {
293         return -errno;
294     }
295 
296     stop.idle = 0;
297 
298     if ( drmCommandWrite( info->drmFD, DRM_R128_CCE_STOP,
299                           &stop, sizeof(drmR128CCEStop) )) {
300         return -errno;
301     } else {
302         return 0;
303     }
304 }
305 
306 #endif
307 
308 #ifdef HAVE_XAA_H
309 /* Setup for XAA SolidFill. */
R128SetupForSolidFill(ScrnInfoPtr pScrn,int color,int rop,unsigned int planemask)310 static void R128SetupForSolidFill(ScrnInfoPtr pScrn,
311 				  int color, int rop, unsigned int planemask)
312 {
313     R128InfoPtr   info      = R128PTR(pScrn);
314     unsigned char *R128MMIO = info->MMIO;
315 
316     R128WaitForFifo(pScrn, 4);
317     OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
318 				     | R128_GMC_BRUSH_SOLID_COLOR
319 				     | R128_GMC_SRC_DATATYPE_COLOR
320 				     | R128_ROP[rop].pattern));
321     OUTREG(R128_DP_BRUSH_FRGD_CLR,  color);
322     OUTREG(R128_DP_WRITE_MASK,      planemask);
323     OUTREG(R128_DP_CNTL,            (R128_DST_X_LEFT_TO_RIGHT
324 				     | R128_DST_Y_TOP_TO_BOTTOM));
325 }
326 
327 /* Subsequent XAA SolidFillRect.
328 
329    Tests: xtest CH06/fllrctngl, xterm
330 */
R128SubsequentSolidFillRect(ScrnInfoPtr pScrn,int x,int y,int w,int h)331 static void  R128SubsequentSolidFillRect(ScrnInfoPtr pScrn,
332 					 int x, int y, int w, int h)
333 {
334     R128InfoPtr   info      = R128PTR(pScrn);
335     unsigned char *R128MMIO = info->MMIO;
336 
337     R128WaitForFifo(pScrn, 2);
338     OUTREG(R128_DST_Y_X,          (y << 16) | x);
339     OUTREG(R128_DST_WIDTH_HEIGHT, (w << 16) | h);
340 }
341 
342 /* Setup for XAA solid lines. */
R128SetupForSolidLine(ScrnInfoPtr pScrn,int color,int rop,unsigned int planemask)343 static void R128SetupForSolidLine(ScrnInfoPtr pScrn,
344 				  int color, int rop, unsigned int planemask)
345 {
346     R128InfoPtr   info      = R128PTR(pScrn);
347     unsigned char *R128MMIO = info->MMIO;
348 
349     R128WaitForFifo(pScrn, 3);
350     OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
351 				     | R128_GMC_BRUSH_SOLID_COLOR
352 				     | R128_GMC_SRC_DATATYPE_COLOR
353 				     | R128_ROP[rop].pattern));
354     OUTREG(R128_DP_BRUSH_FRGD_CLR,  color);
355     OUTREG(R128_DP_WRITE_MASK,      planemask);
356 }
357 
358 
359 /* Subsequent XAA solid Bresenham line.
360 
361    Tests: xtest CH06/drwln, ico, Mark Vojkovich's linetest program
362 
363    [See http://www.xfree86.org/devel/archives/devel/1999-Jun/0102.shtml for
364    Mark Vojkovich's linetest program, posted 2Jun99 to devel@xfree86.org.]
365 
366    x11perf -line500
367                                1024x768@76Hz   1024x768@76Hz
368                                         8bpp           32bpp
369    not used:                     39700.0/sec     34100.0/sec
370    used:                         47600.0/sec     36800.0/sec
371 */
R128SubsequentSolidBresenhamLine(ScrnInfoPtr pScrn,int x,int y,int major,int minor,int err,int len,int octant)372 static void R128SubsequentSolidBresenhamLine(ScrnInfoPtr pScrn,
373 					     int x, int y,
374 					     int major, int minor,
375 					     int err, int len, int octant)
376 {
377     R128InfoPtr   info      = R128PTR(pScrn);
378     unsigned char *R128MMIO = info->MMIO;
379     int           flags     = 0;
380 
381     if (octant & YMAJOR)         flags |= R128_DST_Y_MAJOR;
382     if (!(octant & XDECREASING)) flags |= R128_DST_X_DIR_LEFT_TO_RIGHT;
383     if (!(octant & YDECREASING)) flags |= R128_DST_Y_DIR_TOP_TO_BOTTOM;
384 
385     R128WaitForFifo(pScrn, 6);
386     OUTREG(R128_DP_CNTL_XDIR_YDIR_YMAJOR, flags);
387     OUTREG(R128_DST_Y_X,                  (y << 16) | x);
388     OUTREG(R128_DST_BRES_ERR,             err);
389     OUTREG(R128_DST_BRES_INC,             minor);
390     OUTREG(R128_DST_BRES_DEC,             -major);
391     OUTREG(R128_DST_BRES_LNTH,            len);
392 }
393 
394 /* Subsequent XAA solid horizontal and vertical lines
395 
396    1024x768@76Hz 8bpp
397                              Without             With
398    x11perf -hseg500      87600.0/sec     798000.0/sec
399    x11perf -vseg500      38100.0/sec      38000.0/sec
400 */
R128SubsequentSolidHorVertLine(ScrnInfoPtr pScrn,int x,int y,int len,int dir)401 static void R128SubsequentSolidHorVertLine(ScrnInfoPtr pScrn,
402 					   int x, int y, int len, int dir )
403 {
404     R128InfoPtr   info      = R128PTR(pScrn);
405     unsigned char *R128MMIO = info->MMIO;
406 
407     R128WaitForFifo(pScrn, 1);
408     OUTREG(R128_DP_CNTL, (R128_DST_X_LEFT_TO_RIGHT
409 			  | R128_DST_Y_TOP_TO_BOTTOM));
410 
411     if (dir == DEGREES_0) {
412 	R128SubsequentSolidFillRect(pScrn, x, y, len, 1);
413     } else {
414 	R128SubsequentSolidFillRect(pScrn, x, y, 1, len);
415     }
416 }
417 
418 /* Setup for XAA dashed lines.
419 
420    Tests: xtest CH05/stdshs, XFree86/drwln
421 
422    NOTE: Since we can only accelerate lines with power-of-2 patterns of
423    length <= 32, these x11perf numbers are not representative of the
424    speed-up on appropriately-sized patterns.
425 
426    1024x768@76Hz 8bpp
427                              Without             With
428    x11perf -dseg100     218000.0/sec     222000.0/sec
429    x11perf -dline100    215000.0/sec     221000.0/sec
430    x11perf -ddline100   178000.0/sec     180000.0/sec
431 */
R128SetupForDashedLine(ScrnInfoPtr pScrn,int fg,int bg,int rop,unsigned int planemask,int length,unsigned char * pattern)432 static void R128SetupForDashedLine(ScrnInfoPtr pScrn,
433 				   int fg, int bg,
434 				   int rop, unsigned int planemask,
435 				   int length, unsigned char *pattern)
436 {
437     R128InfoPtr   info      = R128PTR(pScrn);
438     unsigned char *R128MMIO = info->MMIO;
439     uint32_t      pat       = *(uint32_t *)(pointer)pattern;
440 
441 #if X_BYTE_ORDER == X_LITTLE_ENDIAN
442 # define PAT_SHIFT(pat,n) pat << n
443 #else
444 # define PAT_SHIFT(pat,n) pat >> n
445 #endif
446 
447     switch (length) {
448     case  2: pat |= PAT_SHIFT(pat,2); /* fall through */
449     case  4: pat |= PAT_SHIFT(pat,4); /* fall through */
450     case  8: pat |= PAT_SHIFT(pat,8); /* fall through */
451     case 16: pat |= PAT_SHIFT(pat,16);
452     }
453 
454     R128WaitForFifo(pScrn, 5);
455     OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
456 				     | (bg == -1
457 					? R128_GMC_BRUSH_32x1_MONO_FG_LA
458 					: R128_GMC_BRUSH_32x1_MONO_FG_BG)
459 				     | R128_ROP[rop].pattern
460 				     | R128_GMC_BYTE_LSB_TO_MSB));
461     OUTREG(R128_DP_WRITE_MASK,      planemask);
462     OUTREG(R128_DP_BRUSH_FRGD_CLR,  fg);
463     OUTREG(R128_DP_BRUSH_BKGD_CLR,  bg);
464     OUTREG(R128_BRUSH_DATA0,        pat);
465 }
466 
467 /* Subsequent XAA dashed line. */
R128SubsequentDashedBresenhamLine(ScrnInfoPtr pScrn,int x,int y,int major,int minor,int err,int len,int octant,int phase)468 static void R128SubsequentDashedBresenhamLine(ScrnInfoPtr pScrn,
469 					      int x, int y,
470 					      int major, int minor,
471 					      int err, int len, int octant,
472 					      int phase)
473 {
474     R128InfoPtr   info      = R128PTR(pScrn);
475     unsigned char *R128MMIO = info->MMIO;
476     int           flags     = 0;
477 
478     if (octant & YMAJOR)         flags |= R128_DST_Y_MAJOR;
479     if (!(octant & XDECREASING)) flags |= R128_DST_X_DIR_LEFT_TO_RIGHT;
480     if (!(octant & YDECREASING)) flags |= R128_DST_Y_DIR_TOP_TO_BOTTOM;
481 
482     R128WaitForFifo(pScrn, 7);
483     OUTREG(R128_DP_CNTL_XDIR_YDIR_YMAJOR, flags);
484     OUTREG(R128_DST_Y_X,                  (y << 16) | x);
485     OUTREG(R128_BRUSH_Y_X,                (phase << 16) | phase);
486     OUTREG(R128_DST_BRES_ERR,             err);
487     OUTREG(R128_DST_BRES_INC,             minor);
488     OUTREG(R128_DST_BRES_DEC,             -major);
489     OUTREG(R128_DST_BRES_LNTH,            len);
490 }
491 
492 #if R128_TRAPEZOIDS
493 				/* This doesn't work.  Except in the
494 				   lower-left quadrant, all of the pixel
495 				   errors appear to be because eL and eR
496 				   are not correct.  Drawing from right to
497 				   left doesn't help.  Be aware that the
498 				   non-_SUB registers set the sub-pixel
499 				   values to 0.5 (0x08), which isn't what
500 				   XAA wants. */
501 /* Subsequent XAA SolidFillTrap.  XAA always passes data that assumes we
502    fill from top to bottom, so dyL and dyR are always non-negative. */
R128SubsequentSolidFillTrap(ScrnInfoPtr pScrn,int y,int h,int left,int dxL,int dyL,int eL,int right,int dxR,int dyR,int eR)503 static void R128SubsequentSolidFillTrap(ScrnInfoPtr pScrn, int y, int h,
504 					int left, int dxL, int dyL, int eL,
505 					int right, int dxR, int dyR, int eR)
506 {
507     R128InfoPtr   info      = R128PTR(pScrn);
508     unsigned char *R128MMIO = info->MMIO;
509     int           flags     = 0;
510     int           Lymajor   = 0;
511     int           Rymajor   = 0;
512     int           origdxL   = dxL;
513     int           origdxR   = dxR;
514 
515     DEBUG(xf86DrvMsg(pScrn->scrnIndex, X_INFO,
516                         "Trap %d %d; "
517                         "L %d %d %d %d; "
518                         "R %d %d %d %d\n",
519                         y, h,
520                         left, dxL, dyL, eL,
521                         right, dxR, dyR, eR));
522 
523     if (dxL < 0)    dxL = -dxL; else flags |= (1 << 0) /* | (1 << 8) */;
524     if (dxR < 0)    dxR = -dxR; else flags |= (1 << 6);
525 
526     R128WaitForFifo(pScrn, 11);
527 
528 #if 1
529     OUTREG(R128_DP_CNTL,            flags | (1 << 1) | (1 << 7));
530     OUTREG(R128_DST_Y_SUB,          ((y) << 4) | 0x0 );
531     OUTREG(R128_DST_X_SUB,          ((left) << 4)|0x0);
532     OUTREG(R128_TRAIL_BRES_ERR,     eR-dxR);
533     OUTREG(R128_TRAIL_BRES_INC,     dxR);
534     OUTREG(R128_TRAIL_BRES_DEC,     -dyR);
535     OUTREG(R128_TRAIL_X_SUB,        ((right) << 4) | 0x0);
536     OUTREG(R128_LEAD_BRES_ERR,      eL-dxL);
537     OUTREG(R128_LEAD_BRES_INC,      dxL);
538     OUTREG(R128_LEAD_BRES_DEC,      -dyL);
539     OUTREG(R128_LEAD_BRES_LNTH_SUB, ((h) << 4) | 0x00);
540 #else
541     OUTREG(R128_DP_CNTL,            flags | (1 << 1) );
542     OUTREG(R128_DST_Y_SUB,          (y << 4));
543     OUTREG(R128_DST_X_SUB,          (right << 4));
544     OUTREG(R128_TRAIL_BRES_ERR,     eL);
545     OUTREG(R128_TRAIL_BRES_INC,     dxL);
546     OUTREG(R128_TRAIL_BRES_DEC,     -dyL);
547     OUTREG(R128_TRAIL_X_SUB,        (left << 4) | 0);
548     OUTREG(R128_LEAD_BRES_ERR,      eR);
549     OUTREG(R128_LEAD_BRES_INC,      dxR);
550     OUTREG(R128_LEAD_BRES_DEC,      -dyR);
551     OUTREG(R128_LEAD_BRES_LNTH_SUB, h << 4);
552 #endif
553 }
554 #endif
555 
556 /* Setup for XAA screen-to-screen copy.
557 
558    Tests: xtest CH06/fllrctngl (also tests transparency).
559 */
R128SetupForScreenToScreenCopy(ScrnInfoPtr pScrn,int xdir,int ydir,int rop,unsigned int planemask,int trans_color)560 static void R128SetupForScreenToScreenCopy(ScrnInfoPtr pScrn,
561 					   int xdir, int ydir, int rop,
562 					   unsigned int planemask,
563 					   int trans_color)
564 {
565     R128InfoPtr   info      = R128PTR(pScrn);
566     unsigned char *R128MMIO = info->MMIO;
567 
568     info->xdir = xdir;
569     info->ydir = ydir;
570     R128WaitForFifo(pScrn, 3);
571     OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
572 				     | R128_GMC_BRUSH_SOLID_COLOR
573 				     | R128_GMC_SRC_DATATYPE_COLOR
574 				     | R128_ROP[rop].rop
575 				     | R128_DP_SRC_SOURCE_MEMORY));
576     OUTREG(R128_DP_WRITE_MASK,      planemask);
577     OUTREG(R128_DP_CNTL,            ((xdir >= 0 ? R128_DST_X_LEFT_TO_RIGHT : 0)
578 				     | (ydir >= 0
579 					? R128_DST_Y_TOP_TO_BOTTOM
580 					: 0)));
581 
582     if (trans_color != -1) {
583 				/* Set up for transparency */
584 	R128WaitForFifo(pScrn, 3);
585 	OUTREG(R128_CLR_CMP_CLR_SRC, trans_color);
586 	OUTREG(R128_CLR_CMP_MASK,    R128_CLR_CMP_MSK);
587 	OUTREG(R128_CLR_CMP_CNTL,    (R128_SRC_CMP_NEQ_COLOR
588 				      | R128_CLR_CMP_SRC_SOURCE));
589     }
590 }
591 
592 /* Subsequent XAA screen-to-screen copy. */
R128SubsequentScreenToScreenCopy(ScrnInfoPtr pScrn,int xa,int ya,int xb,int yb,int w,int h)593 static void R128SubsequentScreenToScreenCopy(ScrnInfoPtr pScrn,
594 					     int xa, int ya,
595 					     int xb, int yb,
596 					     int w, int h)
597 {
598     R128InfoPtr   info      = R128PTR(pScrn);
599     unsigned char *R128MMIO = info->MMIO;
600 
601     if (info->xdir < 0) xa += w - 1, xb += w - 1;
602     if (info->ydir < 0) ya += h - 1, yb += h - 1;
603 
604     R128WaitForFifo(pScrn, 3);
605     OUTREG(R128_SRC_Y_X,          (ya << 16) | xa);
606     OUTREG(R128_DST_Y_X,          (yb << 16) | xb);
607     OUTREG(R128_DST_HEIGHT_WIDTH, (h << 16) | w);
608 }
609 
610 /* Setup for XAA mono 8x8 pattern color expansion.  Patterns with
611    transparency use `bg == -1'.  This routine is only used if the XAA
612    pixmap cache is turned on.
613 
614    Tests: xtest XFree86/fllrctngl (no other test will test this routine with
615                                    both transparency and non-transparency)
616 
617    1024x768@76Hz 8bpp
618                              Without             With
619    x11perf -srect100     38600.0/sec      85700.0/sec
620    x11perf -osrect100    38600.0/sec      85700.0/sec
621 */
R128SetupForMono8x8PatternFill(ScrnInfoPtr pScrn,int patternx,int patterny,int fg,int bg,int rop,unsigned int planemask)622 static void R128SetupForMono8x8PatternFill(ScrnInfoPtr pScrn,
623 					   int patternx, int patterny,
624 					   int fg, int bg, int rop,
625 					   unsigned int planemask)
626 {
627     R128InfoPtr   info      = R128PTR(pScrn);
628     unsigned char *R128MMIO = info->MMIO;
629 
630     R128WaitForFifo(pScrn, 6);
631     OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
632 				     | (bg == -1
633 					? R128_GMC_BRUSH_8X8_MONO_FG_LA
634 					: R128_GMC_BRUSH_8X8_MONO_FG_BG)
635 				     | R128_ROP[rop].pattern
636 				     | R128_GMC_BYTE_LSB_TO_MSB));
637     OUTREG(R128_DP_WRITE_MASK,      planemask);
638     OUTREG(R128_DP_BRUSH_FRGD_CLR,  fg);
639     OUTREG(R128_DP_BRUSH_BKGD_CLR,  bg);
640     OUTREG(R128_BRUSH_DATA0,        patternx);
641     OUTREG(R128_BRUSH_DATA1,        patterny);
642 }
643 
644 /* Subsequent XAA 8x8 pattern color expansion.  Because they are used in
645    the setup function, `patternx' and `patterny' are not used here. */
R128SubsequentMono8x8PatternFillRect(ScrnInfoPtr pScrn,int patternx,int patterny,int x,int y,int w,int h)646 static void R128SubsequentMono8x8PatternFillRect(ScrnInfoPtr pScrn,
647 						 int patternx, int patterny,
648 						 int x, int y, int w, int h)
649 {
650     R128InfoPtr   info      = R128PTR(pScrn);
651     unsigned char *R128MMIO = info->MMIO;
652 
653     R128WaitForFifo(pScrn, 3);
654     OUTREG(R128_BRUSH_Y_X,        (patterny << 8) | patternx);
655     OUTREG(R128_DST_Y_X,          (y << 16) | x);
656     OUTREG(R128_DST_HEIGHT_WIDTH, (h << 16) | w);
657 }
658 
659 #if 0
660 /* Setup for XAA color 8x8 pattern fill.
661 
662    Tests: xtest XFree86/fllrctngl (with Mono8x8PatternFill off)
663 */
664 static void R128SetupForColor8x8PatternFill(ScrnInfoPtr pScrn,
665 					    int patx, int paty,
666 					    int rop, unsigned int planemask,
667 					    int trans_color)
668 {
669     R128InfoPtr   info      = R128PTR(pScrn);
670     unsigned char *R128MMIO = info->MMIO;
671 
672     DEBUG(xf86DrvMsg(pScrn->scrnIndex, X_INFO,
673                         "Color8x8 %d %d %d\n",
674                         trans_color, patx, paty));
675 
676     R128WaitForFifo(pScrn, 2);
677     OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
678 				     | R128_GMC_BRUSH_8x8_COLOR
679 				     | R128_GMC_SRC_DATATYPE_COLOR
680 				     | R128_ROP[rop].rop
681 				     | R128_DP_SRC_SOURCE_MEMORY));
682     OUTREG(R128_DP_WRITE_MASK,      planemask);
683 
684     if (trans_color != -1) {
685 				/* Set up for transparency */
686 	R128WaitForFifo(pScrn, 3);
687 	OUTREG(R128_CLR_CMP_CLR_SRC, trans_color);
688 	OUTREG(R128_CLR_CMP_MASK,    R128_CLR_CMP_MSK);
689 	OUTREG(R128_CLR_CMP_CNTL,    (R128_SRC_CMP_NEQ_COLOR
690 				      | R128_CLR_CMP_SRC_SOURCE));
691     }
692 }
693 
694 /* Subsequent XAA 8x8 pattern color expansion. */
695 static void R128SubsequentColor8x8PatternFillRect( ScrnInfoPtr pScrn,
696 						   int patx, int paty,
697 						   int x, int y, int w, int h)
698 {
699     R128InfoPtr   info      = R128PTR(pScrn);
700     unsigned char *R128MMIO = info->MMIO;
701 
702     DEBUG(xf86DrvMsg(pScrn->scrnIndex, X_INFO,
703                         "Color8x8 %d,%d %d,%d %d %d\n",
704                         patx, paty, x, y, w, h));
705     R128WaitForFifo(pScrn, 3);
706     OUTREG(R128_SRC_Y_X, (paty << 16) | patx);
707     OUTREG(R128_DST_Y_X, (y << 16) | x);
708     OUTREG(R128_DST_HEIGHT_WIDTH, (h << 16) | w);
709 }
710 #endif
711 
712 /* Setup for XAA indirect CPU-to-screen color expansion (indirect).
713    Because of how the scratch buffer is initialized, this is really a
714    mainstore-to-screen color expansion.  Transparency is supported when `bg
715    == -1'.
716 
717    x11perf -ftext (pure indirect):
718                                1024x768@76Hz   1024x768@76Hz
719                                         8bpp           32bpp
720    not used:                    685000.0/sec    794000.0/sec
721    used:                       1070000.0/sec   1080000.0/sec
722 
723    We could improve this indirect routine by about 10% if the hardware
724    could accept DWORD padded scanlines, or if XAA could provide bit-packed
725    data.  We might also be able to move to a direct routine if there were
726    more HOST_DATA registers.
727 
728    Implementing the hybrid indirect/direct scheme improved performance in a
729    few areas:
730 
731    1024x768@76 8bpp
732                                    Indirect          Hybrid
733    x11perf -oddsrect10          50100.0/sec     71700.0/sec
734    x11perf -oddsrect100          4240.0/sec      6660.0/sec
735    x11perf -bigsrect10          50300.0/sec     71100.0/sec
736    x11perf -bigsrect100          4190.0/sec      6800.0/sec
737    x11perf -polytext           584000.0/sec    714000.0/sec
738    x11perf -polytext16         154000.0/sec    172000.0/sec
739    x11perf -seg1              1780000.0/sec   1880000.0/sec
740    x11perf -copyplane10         42900.0/sec     58300.0/sec
741    x11perf -copyplane100         4400.0/sec      6710.0/sec
742    x11perf -putimagexy10         5090.0/sec      6670.0/sec
743    x11perf -putimagexy100         424.0/sec       575.0/sec
744 
745    1024x768@76 -depth 24 -fbbpp 32
746                                    Indirect          Hybrid
747    x11perf -oddsrect100          4240.0/sec      6670.0/sec
748    x11perf -bigsrect100          4190.0/sec      6800.0/sec
749    x11perf -polytext           585000.0/sec    719000.0/sec
750    x11perf -seg1              2960000.0/sec   2990000.0/sec
751    x11perf -copyplane100         4400.0/sec      6700.0/sec
752    x11perf -putimagexy100         138.0/sec       191.0/sec
753 
754 */
R128SetupForScanlineCPUToScreenColorExpandFill(ScrnInfoPtr pScrn,int fg,int bg,int rop,unsigned int planemask)755 static void R128SetupForScanlineCPUToScreenColorExpandFill(ScrnInfoPtr pScrn,
756 							   int fg, int bg,
757 							   int rop,
758 							   unsigned int
759 							   planemask)
760 {
761     R128InfoPtr   info      = R128PTR(pScrn);
762     unsigned char *R128MMIO = info->MMIO;
763 
764     R128WaitForFifo(pScrn, 4);
765 #if X_BYTE_ORDER == X_LITTLE_ENDIAN
766     OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
767 				     | R128_GMC_DST_CLIPPING
768 				     | R128_GMC_BRUSH_NONE
769 				     | (bg == -1
770 					? R128_GMC_SRC_DATATYPE_MONO_FG_LA
771 					: R128_GMC_SRC_DATATYPE_MONO_FG_BG)
772 				     | R128_ROP[rop].rop
773 				     | R128_GMC_BYTE_LSB_TO_MSB
774 				     | R128_DP_SRC_SOURCE_HOST_DATA));
775 #else	/* X_BYTE_ORDER == X_BIG_ENDIAN */
776     OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
777 				     | R128_GMC_DST_CLIPPING
778 				     | R128_GMC_BRUSH_NONE
779 				     | (bg == -1
780 					? R128_GMC_SRC_DATATYPE_MONO_FG_LA
781 					: R128_GMC_SRC_DATATYPE_MONO_FG_BG)
782 				     | R128_ROP[rop].rop
783 				     | R128_DP_SRC_SOURCE_HOST_DATA));
784 #endif
785     OUTREG(R128_DP_WRITE_MASK,      planemask);
786     OUTREG(R128_DP_SRC_FRGD_CLR,    fg);
787     OUTREG(R128_DP_SRC_BKGD_CLR,    bg);
788 }
789 
790 /* Subsequent XAA indirect CPU-to-screen color expansion.  This is only
791    called once for each rectangle. */
R128SubsequentScanlineCPUToScreenColorExpandFill(ScrnInfoPtr pScrn,int x,int y,int w,int h,int skipleft)792 static void R128SubsequentScanlineCPUToScreenColorExpandFill(ScrnInfoPtr pScrn,
793 							     int x, int y,
794 							     int w, int h,
795 							     int skipleft)
796 {
797     R128InfoPtr   info      = R128PTR(pScrn);
798     unsigned char *R128MMIO = info->MMIO;
799     int x1clip = x+skipleft;
800     int x2clip = x+w;
801 
802     info->scanline_h      = h;
803     info->scanline_words  = (w + 31) >> 5;
804 
805 #if 0
806     /* Seems as though the Rage128's doesn't like blitting directly
807      * as we must be overwriting something too quickly, therefore we
808      * render to the buffer first and then blit */
809     if ((info->scanline_words * h) <= 9) {
810 	/* Turn on direct for less than 9 dword colour expansion */
811 	info->scratch_buffer[0]
812 	    = (unsigned char *)(ADDRREG(R128_HOST_DATA_LAST)
813 				- (info->scanline_words - 1));
814 	info->scanline_direct = 1;
815     } else
816 #endif
817     {
818 	/* Use indirect for anything else */
819 	info->scratch_buffer[0] = info->scratch_save;
820 	info->scanline_direct   = 0;
821     }
822 
823     if (pScrn->bitsPerPixel == 24) {
824 	x1clip *= 3;
825 	x2clip *= 3;
826     }
827 
828     R128WaitForFifo(pScrn, 4 + (info->scanline_direct ?
829 					(info->scanline_words * h) : 0) );
830     OUTREG(R128_SC_TOP_LEFT,     (y << 16)       | (x1clip & 0xffff));
831     OUTREG(R128_SC_BOTTOM_RIGHT, ((y+h-1) << 16) | ((x2clip-1) & 0xffff));
832     OUTREG(R128_DST_Y_X,         (y << 16)       | (x & 0xffff));
833     /* Have to pad the width here and use clipping engine */
834     OUTREG(R128_DST_HEIGHT_WIDTH, (h << 16)      | ((w + 31) & ~31));
835 }
836 
837 /* Subsequent XAA indirect CPU-to-screen color expansion.  This is called
838    once for each scanline. */
R128SubsequentColorExpandScanline(ScrnInfoPtr pScrn,int bufno)839 static void R128SubsequentColorExpandScanline(ScrnInfoPtr pScrn, int bufno)
840 {
841     R128InfoPtr     info      = R128PTR(pScrn);
842     unsigned char   *R128MMIO = info->MMIO;
843     uint32_t        *p        = (pointer)info->scratch_buffer[bufno];
844     int             i;
845     int             left      = info->scanline_words;
846     volatile uint32_t *d;
847 
848     if (info->scanline_direct) return;
849     --info->scanline_h;
850     while (left) {
851         write_mem_barrier();
852 	if (left <= 8) {
853 	  /* Last scanline - finish write to DATA_LAST */
854 	  if (info->scanline_h == 0) {
855 	    R128WaitForFifo(pScrn, left);
856 				/* Unrolling doesn't improve performance */
857 	    for (d = ADDRREG(R128_HOST_DATA_LAST) - (left - 1); left; --left)
858 		*d++ = *p++;
859 	    return;
860 	  } else {
861 	    R128WaitForFifo(pScrn, left);
862 				/* Unrolling doesn't improve performance */
863 	    for (d = ADDRREG(R128_HOST_DATA7) - (left - 1); left; --left)
864 		*d++ = *p++;
865 	  }
866 	} else {
867 	    R128WaitForFifo(pScrn, 8);
868 				/* Unrolling doesn't improve performance */
869 	    for (d = ADDRREG(R128_HOST_DATA0), i = 0; i < 8; i++)
870 		*d++ = *p++;
871 	    left -= 8;
872 	}
873     }
874 }
875 
876 /* Setup for XAA indirect image write.
877 
878    1024x768@76Hz 8bpp
879                              Without             With
880    x11perf -putimage10   37500.0/sec      39300.0/sec
881    x11perf -putimage100   2150.0/sec       1170.0/sec
882    x11perf -putimage500    108.0/sec         49.8/sec
883  */
R128SetupForScanlineImageWrite(ScrnInfoPtr pScrn,int rop,unsigned int planemask,int trans_color,int bpp,int depth)884 static void R128SetupForScanlineImageWrite(ScrnInfoPtr pScrn,
885 					   int rop,
886 					   unsigned int planemask,
887 					   int trans_color,
888 					   int bpp,
889 					   int depth)
890 {
891     R128InfoPtr   info      = R128PTR(pScrn);
892     unsigned char *R128MMIO = info->MMIO;
893 
894     info->scanline_bpp = bpp;
895 
896     R128WaitForFifo(pScrn, 2);
897     OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
898 				     | R128_GMC_DST_CLIPPING
899 				     | R128_GMC_BRUSH_1X8_COLOR
900 				     | R128_GMC_SRC_DATATYPE_COLOR
901 				     | R128_ROP[rop].rop
902 				     | R128_GMC_BYTE_LSB_TO_MSB
903 				     | R128_DP_SRC_SOURCE_HOST_DATA));
904     OUTREG(R128_DP_WRITE_MASK,      planemask);
905 
906     if (trans_color != -1) {
907 				/* Set up for transparency */
908 	R128WaitForFifo(pScrn, 3);
909 	OUTREG(R128_CLR_CMP_CLR_SRC, trans_color);
910 	OUTREG(R128_CLR_CMP_MASK,    R128_CLR_CMP_MSK);
911 	OUTREG(R128_CLR_CMP_CNTL,    (R128_SRC_CMP_NEQ_COLOR
912 				      | R128_CLR_CMP_SRC_SOURCE));
913     }
914 }
915 
916 /* Subsequent XAA indirect image write. This is only called once for each
917    rectangle. */
R128SubsequentScanlineImageWriteRect(ScrnInfoPtr pScrn,int x,int y,int w,int h,int skipleft)918 static void R128SubsequentScanlineImageWriteRect(ScrnInfoPtr pScrn,
919 						 int x, int y,
920 						 int w, int h,
921 						 int skipleft)
922 {
923     R128InfoPtr   info      = R128PTR(pScrn);
924     unsigned char *R128MMIO = info->MMIO;
925     int x1clip = x+skipleft;
926     int x2clip = x+w;
927 
928     int shift = 0; /* 32bpp */
929 
930     if (pScrn->bitsPerPixel == 8) shift = 3;
931     else if (pScrn->bitsPerPixel == 16) shift = 1;
932 
933     info->scanline_h      = h;
934     info->scanline_words  = (w * info->scanline_bpp + 31) >> 5;
935 
936 #if 0
937     /* Seeing as the CPUToScreen doesn't like this, I've done this
938      * here too, as it uses pretty much the same path. */
939     if ((info->scanline_words * h) <= 9) {
940 	/* Turn on direct for less than 9 dword colour expansion */
941 	info->scratch_buffer[0]
942 	    = (unsigned char *)(ADDRREG(R128_HOST_DATA_LAST)
943 				- (info->scanline_words - 1));
944 	info->scanline_direct = 1;
945     } else
946 #endif
947     {
948 	/* Use indirect for anything else */
949 	info->scratch_buffer[0] = info->scratch_save;
950 	info->scanline_direct   = 0;
951     }
952 
953     if (pScrn->bitsPerPixel == 24) {
954 	x1clip *= 3;
955 	x2clip *= 3;
956     }
957 
958     R128WaitForFifo(pScrn, 4 + (info->scanline_direct ?
959 					(info->scanline_words * h) : 0) );
960     OUTREG(R128_SC_TOP_LEFT,      (y << 16)       | (x1clip & 0xffff));
961     OUTREG(R128_SC_BOTTOM_RIGHT,  ((y+h-1) << 16) | ((x2clip-1) & 0xffff));
962     OUTREG(R128_DST_Y_X,          (y << 16)       | (x & 0xffff));
963     /* Have to pad the width here and use clipping engine */
964     OUTREG(R128_DST_HEIGHT_WIDTH, (h << 16)       | ((w + shift) & ~shift));
965 }
966 
967 /* Subsequent XAA indirect iamge write.  This is called once for each
968    scanline. */
R128SubsequentImageWriteScanline(ScrnInfoPtr pScrn,int bufno)969 static void R128SubsequentImageWriteScanline(ScrnInfoPtr pScrn, int bufno)
970 {
971     R128InfoPtr     info      = R128PTR(pScrn);
972     unsigned char   *R128MMIO = info->MMIO;
973     uint32_t        *p        = (pointer)info->scratch_buffer[bufno];
974     int             i;
975     int             left      = info->scanline_words;
976     volatile uint32_t *d;
977 
978     if (info->scanline_direct) return;
979     --info->scanline_h;
980     while (left) {
981         write_mem_barrier();
982 	if (left <= 8) {
983 	  /* Last scanline - finish write to DATA_LAST */
984 	  if (info->scanline_h == 0) {
985 	    R128WaitForFifo(pScrn, left);
986 				/* Unrolling doesn't improve performance */
987 	    for (d = ADDRREG(R128_HOST_DATA_LAST) - (left - 1); left; --left)
988 		*d++ = *p++;
989 	    return;
990 	  } else {
991 	    R128WaitForFifo(pScrn, left);
992 				/* Unrolling doesn't improve performance */
993 	    for (d = ADDRREG(R128_HOST_DATA7) - (left - 1); left; --left)
994 		*d++ = *p++;
995 	  }
996 	} else {
997 	    R128WaitForFifo(pScrn, 8);
998 				/* Unrolling doesn't improve performance */
999 	    for (d = ADDRREG(R128_HOST_DATA0), i = 0; i < 8; i++)
1000 		*d++ = *p++;
1001 	    left -= 8;
1002 	}
1003     }
1004 }
1005 #endif
1006 
1007 /* Initialize the acceleration hardware. */
R128EngineInit(ScrnInfoPtr pScrn)1008 void R128EngineInit(ScrnInfoPtr pScrn)
1009 {
1010     R128InfoPtr   info      = R128PTR(pScrn);
1011     unsigned char *R128MMIO = info->MMIO;
1012 
1013     DEBUG(xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1014                         "EngineInit (%d/%d)\n",
1015                         info->CurrentLayout.pixel_code,
1016                         info->CurrentLayout.bitsPerPixel));
1017 
1018     OUTREG(R128_SCALE_3D_CNTL, 0);
1019     R128EngineReset(pScrn);
1020 
1021     switch (info->CurrentLayout.pixel_code) {
1022     case 8:  info->datatype = 2; break;
1023     case 15: info->datatype = 3; break;
1024     case 16: info->datatype = 4; break;
1025     case 24: info->datatype = 5; break;
1026     case 32: info->datatype = 6; break;
1027     default:
1028     DEBUG(xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1029                         "Unknown depth/bpp = %d/%d (code = %d)\n",
1030                         info->CurrentLayout.depth,
1031                         info->CurrentLayout.bitsPerPixel,
1032                         info->CurrentLayout.pixel_code));
1033     }
1034     info->pitch = (info->CurrentLayout.displayWidth / 8) * (info->CurrentLayout.pixel_bytes == 3 ? 3 : 1);
1035 
1036     DEBUG(xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1037                         "Pitch for acceleration = %d\n", info->pitch));
1038 
1039     R128WaitForFifo(pScrn, 2);
1040     OUTREG(R128_DEFAULT_OFFSET, pScrn->fbOffset);
1041     OUTREG(R128_DEFAULT_PITCH,  info->pitch);
1042 
1043     R128WaitForFifo(pScrn, 4);
1044     OUTREG(R128_AUX_SC_CNTL,             0);
1045     OUTREG(R128_DEFAULT_SC_BOTTOM_RIGHT, (R128_DEFAULT_SC_RIGHT_MAX
1046 					  | R128_DEFAULT_SC_BOTTOM_MAX));
1047     OUTREG(R128_SC_TOP_LEFT,             0);
1048     OUTREG(R128_SC_BOTTOM_RIGHT,         (R128_DEFAULT_SC_RIGHT_MAX
1049 					  | R128_DEFAULT_SC_BOTTOM_MAX));
1050 
1051     info->dp_gui_master_cntl = ((info->datatype << R128_GMC_DST_DATATYPE_SHIFT)
1052 				| R128_GMC_CLR_CMP_CNTL_DIS
1053 				| R128_GMC_AUX_CLIP_DIS);
1054     R128WaitForFifo(pScrn, 1);
1055     OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
1056 				     | R128_GMC_BRUSH_SOLID_COLOR
1057 				     | R128_GMC_SRC_DATATYPE_COLOR));
1058 
1059     R128WaitForFifo(pScrn, 8);
1060     OUTREG(R128_DST_BRES_ERR,      0);
1061     OUTREG(R128_DST_BRES_INC,      0);
1062     OUTREG(R128_DST_BRES_DEC,      0);
1063     OUTREG(R128_DP_BRUSH_FRGD_CLR, 0xffffffff);
1064     OUTREG(R128_DP_BRUSH_BKGD_CLR, 0x00000000);
1065     OUTREG(R128_DP_SRC_FRGD_CLR,   0xffffffff);
1066     OUTREG(R128_DP_SRC_BKGD_CLR,   0x00000000);
1067     OUTREG(R128_DP_WRITE_MASK,     0xffffffff);
1068 
1069     R128WaitForFifo(pScrn, 1);
1070 
1071 #if X_BYTE_ORDER == X_BIG_ENDIAN
1072     /* FIXME: this is a kludge for texture uploads in the 3D driver. Look at
1073      * how the radeon driver handles HOST_DATA_SWAP if you want to implement
1074      * CCE ImageWrite acceleration or anything needing this bit */
1075 #ifdef R128DRI
1076     if (info->directRenderingEnabled)
1077 	OUTREGP(R128_DP_DATATYPE, 0, ~R128_HOST_BIG_ENDIAN_EN);
1078     else
1079 #endif
1080 	OUTREGP(R128_DP_DATATYPE,
1081 		R128_HOST_BIG_ENDIAN_EN, ~R128_HOST_BIG_ENDIAN_EN);
1082 #else /* X_LITTLE_ENDIAN */
1083     OUTREGP(R128_DP_DATATYPE, 0, ~R128_HOST_BIG_ENDIAN_EN);
1084 #endif
1085 
1086 #ifdef R128DRI
1087     info->sc_left         = 0x00000000;
1088     info->sc_right        = R128_DEFAULT_SC_RIGHT_MAX;
1089     info->sc_top          = 0x00000000;
1090     info->sc_bottom       = R128_DEFAULT_SC_BOTTOM_MAX;
1091 
1092     info->re_top_left     = 0x00000000;
1093     info->re_width_height = ((0x7ff << R128_RE_WIDTH_SHIFT) |
1094 			     (0x7ff << R128_RE_HEIGHT_SHIFT));
1095 
1096     info->aux_sc_cntl     = 0x00000000;
1097 #endif
1098 
1099     R128WaitForIdle(pScrn);
1100 }
1101 
1102 #ifdef R128DRI
1103 
1104 #ifdef HAVE_XAA_H
1105 
1106 /* Setup for XAA SolidFill. */
R128CCESetupForSolidFill(ScrnInfoPtr pScrn,int color,int rop,unsigned int planemask)1107 static void R128CCESetupForSolidFill(ScrnInfoPtr pScrn,
1108 				     int color, int rop,
1109 				     unsigned int planemask)
1110 {
1111     R128InfoPtr   info = R128PTR(pScrn);
1112     RING_LOCALS;
1113 
1114     R128CCE_REFRESH( pScrn, info );
1115 
1116     BEGIN_RING( 8 );
1117 
1118     OUT_RING_REG( R128_DP_GUI_MASTER_CNTL,
1119 		  (info->dp_gui_master_cntl
1120 		   | R128_GMC_BRUSH_SOLID_COLOR
1121 		   | R128_GMC_SRC_DATATYPE_COLOR
1122 		   | R128_ROP[rop].pattern) );
1123 
1124     OUT_RING_REG( R128_DP_BRUSH_FRGD_CLR,  color );
1125     OUT_RING_REG( R128_DP_WRITE_MASK,	   planemask );
1126     OUT_RING_REG( R128_DP_CNTL,		   (R128_DST_X_LEFT_TO_RIGHT |
1127 					    R128_DST_Y_TOP_TO_BOTTOM));
1128     ADVANCE_RING();
1129 }
1130 
1131 /* Subsequent XAA SolidFillRect.
1132 
1133    Tests: xtest CH06/fllrctngl, xterm
1134 */
R128CCESubsequentSolidFillRect(ScrnInfoPtr pScrn,int x,int y,int w,int h)1135 static void R128CCESubsequentSolidFillRect(ScrnInfoPtr pScrn,
1136 					   int x, int y, int w, int h)
1137 {
1138     R128InfoPtr   info = R128PTR(pScrn);
1139     RING_LOCALS;
1140 
1141     R128CCE_REFRESH( pScrn, info );
1142 
1143     BEGIN_RING( 4 );
1144 
1145     OUT_RING_REG( R128_DST_Y_X,          (y << 16) | x );
1146     OUT_RING_REG( R128_DST_WIDTH_HEIGHT, (w << 16) | h );
1147 
1148     ADVANCE_RING();
1149 }
1150 
1151 /* Setup for XAA screen-to-screen copy.
1152 
1153    Tests: xtest CH06/fllrctngl (also tests transparency).
1154 */
R128CCESetupForScreenToScreenCopy(ScrnInfoPtr pScrn,int xdir,int ydir,int rop,unsigned int planemask,int trans_color)1155 static void R128CCESetupForScreenToScreenCopy(ScrnInfoPtr pScrn,
1156 					       int xdir, int ydir, int rop,
1157 					       unsigned int planemask,
1158 					       int trans_color)
1159 {
1160     R128InfoPtr   info = R128PTR(pScrn);
1161     RING_LOCALS;
1162 
1163     R128CCE_REFRESH( pScrn, info );
1164 
1165     info->xdir = xdir;
1166     info->ydir = ydir;
1167 
1168     BEGIN_RING( 6 );
1169 
1170     OUT_RING_REG( R128_DP_GUI_MASTER_CNTL,
1171 		  (info->dp_gui_master_cntl
1172 		   | R128_GMC_BRUSH_NONE
1173 		   | R128_GMC_SRC_DATATYPE_COLOR
1174 		   | R128_ROP[rop].rop
1175 		   | R128_DP_SRC_SOURCE_MEMORY) );
1176 
1177     OUT_RING_REG( R128_DP_WRITE_MASK, planemask );
1178     OUT_RING_REG( R128_DP_CNTL,
1179 		  ((xdir >= 0 ? R128_DST_X_LEFT_TO_RIGHT : 0) |
1180 		   (ydir >= 0 ? R128_DST_Y_TOP_TO_BOTTOM : 0)) );
1181 
1182     ADVANCE_RING();
1183 
1184     if (trans_color != -1) {
1185 	BEGIN_RING( 6 );
1186 
1187 	OUT_RING_REG( R128_CLR_CMP_CLR_SRC, trans_color );
1188 	OUT_RING_REG( R128_CLR_CMP_MASK,    R128_CLR_CMP_MSK );
1189 	OUT_RING_REG( R128_CLR_CMP_CNTL,    (R128_SRC_CMP_NEQ_COLOR |
1190 					     R128_CLR_CMP_SRC_SOURCE) );
1191 
1192 	ADVANCE_RING();
1193     }
1194 }
1195 
1196 /* Subsequent XAA screen-to-screen copy. */
R128CCESubsequentScreenToScreenCopy(ScrnInfoPtr pScrn,int xa,int ya,int xb,int yb,int w,int h)1197 static void R128CCESubsequentScreenToScreenCopy(ScrnInfoPtr pScrn,
1198 						 int xa, int ya,
1199 						 int xb, int yb,
1200 						 int w, int h)
1201 {
1202     R128InfoPtr   info = R128PTR(pScrn);
1203     RING_LOCALS;
1204 
1205     R128CCE_REFRESH( pScrn, info );
1206 
1207     if (info->xdir < 0) xa += w - 1, xb += w - 1;
1208     if (info->ydir < 0) ya += h - 1, yb += h - 1;
1209 
1210     BEGIN_RING( 6 );
1211 
1212     OUT_RING_REG( R128_SRC_Y_X,          (ya << 16) | xa );
1213     OUT_RING_REG( R128_DST_Y_X,          (yb << 16) | xb );
1214     OUT_RING_REG( R128_DST_HEIGHT_WIDTH, (h << 16) | w );
1215 
1216     ADVANCE_RING();
1217 }
1218 
1219 
1220 /*
1221  * XAA scanline color expansion
1222  *
1223  * We use HOSTDATA_BLT CCE packets, dividing the image in chunks that fit into
1224  * the indirect buffer if necessary.
1225  */
R128CCESetupForScanlineCPUToScreenColorExpandFill(ScrnInfoPtr pScrn,int fg,int bg,int rop,unsigned int planemask)1226 static void R128CCESetupForScanlineCPUToScreenColorExpandFill(ScrnInfoPtr pScrn,
1227 							      int fg, int bg,
1228 							      int rop,
1229 							      unsigned int
1230 							      planemask)
1231 {
1232     R128InfoPtr   info      = R128PTR(pScrn);
1233     RING_LOCALS;
1234 
1235     R128CCE_REFRESH( pScrn, info );
1236 
1237     BEGIN_RING( 2 );
1238     OUT_RING_REG(R128_DP_WRITE_MASK,      planemask);
1239     ADVANCE_RING();
1240 
1241     info->scanline_rop = rop;
1242     info->scanline_fg  = fg;
1243     info->scanline_bg  = bg;
1244 }
1245 
1246 /* Helper function to write out a HOSTDATA_BLT packet into the indirect buffer
1247    and set the XAA scratch buffer address appropriately */
R128CCEScanlineCPUToScreenColorExpandFillPacket(ScrnInfoPtr pScrn,int bufno)1248 static void R128CCEScanlineCPUToScreenColorExpandFillPacket(ScrnInfoPtr pScrn,
1249 							    int bufno)
1250 {
1251     R128InfoPtr	info = R128PTR(pScrn);
1252     int chunk_words = info->scanline_hpass * info->scanline_words;
1253     RING_LOCALS;
1254 
1255     R128CCE_REFRESH( pScrn, info );
1256 
1257     BEGIN_RING( chunk_words+9 );
1258 
1259     OUT_RING( CCE_PACKET3( R128_CCE_PACKET3_CNTL_HOSTDATA_BLT, chunk_words+9-2 ) );
1260 #if X_BYTE_ORDER == X_LITTLE_ENDIAN
1261     OUT_RING( (info->dp_gui_master_cntl
1262 	       | R128_GMC_DST_CLIPPING
1263 	       | R128_GMC_BRUSH_NONE
1264 	       | (info->scanline_bg == -1
1265 		  ? R128_GMC_SRC_DATATYPE_MONO_FG_LA
1266 		  : R128_GMC_SRC_DATATYPE_MONO_FG_BG)
1267 	       | R128_ROP[info->scanline_rop].rop
1268 	       | R128_GMC_BYTE_LSB_TO_MSB
1269 	       | R128_DP_SRC_SOURCE_HOST_DATA));
1270 #else	/* X_BYTE_ORDER == X_BIG_ENDIAN */
1271     OUT_RING( (info->dp_gui_master_cntl
1272 	       | R128_GMC_DST_CLIPPING
1273 	       | R128_GMC_BRUSH_NONE
1274 	       | (info->scanline_bg == -1
1275 		  ? R128_GMC_SRC_DATATYPE_MONO_FG_LA
1276 		  : R128_GMC_SRC_DATATYPE_MONO_FG_BG)
1277 	       | R128_ROP[info->scanline_rop].rop
1278 	       | R128_DP_SRC_SOURCE_HOST_DATA));
1279 #endif
1280     OUT_RING( (info->scanline_y << 16) | (info->scanline_x1clip & 0xffff) );
1281     OUT_RING( ((info->scanline_y+info->scanline_hpass-1) << 16) | ((info->scanline_x2clip-1) & 0xffff) );
1282     OUT_RING( info->scanline_fg );
1283     OUT_RING( info->scanline_bg );
1284     OUT_RING( (info->scanline_y << 16) | (info->scanline_x & 0xffff));
1285 
1286     /* Have to pad the width here and use clipping engine */
1287     OUT_RING( (info->scanline_hpass << 16)      | ((info->scanline_w + 31) & ~31));
1288 
1289     OUT_RING( chunk_words );
1290 
1291     info->scratch_buffer[bufno] = (unsigned char *) &__head[__count];
1292     __count += chunk_words;
1293 
1294     ADVANCE_RING();
1295 
1296     info->scanline_y += info->scanline_hpass;
1297     info->scanline_h -= info->scanline_hpass;
1298 
1299     if ( R128_VERBOSE )
1300           xf86DrvMsg( pScrn->scrnIndex, X_INFO,
1301 		      "%s: hpass=%d, words=%d => chunk_words=%d, y=%d, h=%d\n",
1302 		      __FUNCTION__, info->scanline_hpass, info->scanline_words,
1303 		      chunk_words, info->scanline_y, info->scanline_h );
1304 }
1305 
1306 /* Subsequent XAA indirect CPU-to-screen color expansion.  This is only
1307    called once for each rectangle. */
R128CCESubsequentScanlineCPUToScreenColorExpandFill(ScrnInfoPtr pScrn,int x,int y,int w,int h,int skipleft)1308 static void R128CCESubsequentScanlineCPUToScreenColorExpandFill(ScrnInfoPtr pScrn,
1309 								int x, int y,
1310 								int w, int h,
1311 								int skipleft)
1312 {
1313     R128InfoPtr   info      = R128PTR(pScrn);
1314 
1315 #define BUFSIZE ( R128_BUFFER_SIZE/4-9 )
1316 
1317     info->scanline_x      = x;
1318     info->scanline_y      = y;
1319     info->scanline_w      = w;
1320     info->scanline_h      = h;
1321 
1322     info->scanline_x1clip = x+skipleft;
1323     info->scanline_x2clip = x+w;
1324 
1325     info->scanline_words  = (w + 31) >> 5;
1326     info->scanline_hpass  = min(h,(BUFSIZE/info->scanline_words));
1327 
1328     if ( R128_VERBOSE )
1329         xf86DrvMsg( pScrn->scrnIndex, X_INFO,
1330 		    "%s: x=%d, y=%d, w=%d, h=%d, skipleft=%d => x1clip=%d, x2clip=%d, hpass=%d, words=%d\n",
1331 		    __FUNCTION__, x, y, w, h, skipleft, info->scanline_x1clip, info->scanline_x2clip,
1332 		    info->scanline_hpass, info->scanline_words );
1333 
1334     R128CCEScanlineCPUToScreenColorExpandFillPacket(pScrn, 0);
1335 }
1336 
1337 /* Subsequent XAA indirect CPU-to-screen color expansion.  This is called
1338    once for each scanline. */
R128CCESubsequentColorExpandScanline(ScrnInfoPtr pScrn,int bufno)1339 static void R128CCESubsequentColorExpandScanline(ScrnInfoPtr pScrn,
1340 						 int bufno)
1341 {
1342     R128InfoPtr     info      = R128PTR(pScrn);
1343 
1344     if ( R128_VERBOSE )
1345         xf86DrvMsg( pScrn->scrnIndex, X_INFO,
1346 		    "%s enter: scanline_hpass=%d, scanline_h=%d\n",
1347 		    __FUNCTION__, info->scanline_hpass, info->scanline_h );
1348 
1349     if (--info->scanline_hpass) {
1350         info->scratch_buffer[bufno] += 4 * info->scanline_words;
1351     }
1352     else if(info->scanline_h) {
1353         info->scanline_hpass = min(info->scanline_h,(BUFSIZE/info->scanline_words));
1354         R128CCEScanlineCPUToScreenColorExpandFillPacket(pScrn, bufno);
1355     }
1356 
1357     if ( R128_VERBOSE )
1358         xf86DrvMsg( pScrn->scrnIndex, X_INFO,
1359 		    "%s exit: scanline_hpass=%d, scanline_h=%d\n",
1360 		    __FUNCTION__, info->scanline_hpass, info->scanline_h );
1361 }
1362 
1363 /* Solid lines */
R128CCESetupForSolidLine(ScrnInfoPtr pScrn,int color,int rop,unsigned int planemask)1364 static void R128CCESetupForSolidLine(ScrnInfoPtr pScrn,
1365 				  int color, int rop, unsigned int planemask)
1366 {
1367     R128InfoPtr   info      = R128PTR(pScrn);
1368     RING_LOCALS;
1369 
1370     R128CCE_REFRESH( pScrn, info );
1371 
1372     BEGIN_RING( 6 );
1373 
1374     OUT_RING_REG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
1375 				     | R128_GMC_BRUSH_SOLID_COLOR
1376 				     | R128_GMC_SRC_DATATYPE_COLOR
1377 				     | R128_ROP[rop].pattern));
1378     OUT_RING_REG(R128_DP_BRUSH_FRGD_CLR,  color);
1379     OUT_RING_REG(R128_DP_WRITE_MASK,      planemask);
1380 
1381     ADVANCE_RING();
1382 }
1383 
R128CCESubsequentSolidBresenhamLine(ScrnInfoPtr pScrn,int x,int y,int major,int minor,int err,int len,int octant)1384 static void R128CCESubsequentSolidBresenhamLine(ScrnInfoPtr pScrn,
1385 					     int x, int y,
1386 					     int major, int minor,
1387 					     int err, int len, int octant)
1388 {
1389     R128InfoPtr   info      = R128PTR(pScrn);
1390     int           flags     = 0;
1391     RING_LOCALS;
1392 
1393     R128CCE_REFRESH( pScrn, info );
1394 
1395     if (octant & YMAJOR)         flags |= R128_DST_Y_MAJOR;
1396     if (!(octant & XDECREASING)) flags |= R128_DST_X_DIR_LEFT_TO_RIGHT;
1397     if (!(octant & YDECREASING)) flags |= R128_DST_Y_DIR_TOP_TO_BOTTOM;
1398 
1399     BEGIN_RING( 12 );
1400 
1401     OUT_RING_REG(R128_DP_CNTL_XDIR_YDIR_YMAJOR, flags);
1402     OUT_RING_REG(R128_DST_Y_X,                  (y << 16) | x);
1403     OUT_RING_REG(R128_DST_BRES_ERR,             err);
1404     OUT_RING_REG(R128_DST_BRES_INC,             minor);
1405     OUT_RING_REG(R128_DST_BRES_DEC,             -major);
1406     OUT_RING_REG(R128_DST_BRES_LNTH,            len);
1407 
1408     ADVANCE_RING();
1409 }
1410 
R128CCESubsequentSolidHorVertLine(ScrnInfoPtr pScrn,int x,int y,int len,int dir)1411 static void R128CCESubsequentSolidHorVertLine(ScrnInfoPtr pScrn,
1412 					   int x, int y, int len, int dir )
1413 {
1414     R128InfoPtr   info      = R128PTR(pScrn);
1415     RING_LOCALS;
1416 
1417     R128CCE_REFRESH( pScrn, info );
1418 
1419     BEGIN_RING( 2 );
1420 
1421     OUT_RING_REG(R128_DP_CNTL, (R128_DST_X_LEFT_TO_RIGHT
1422 			  | R128_DST_Y_TOP_TO_BOTTOM));
1423 
1424     ADVANCE_RING();
1425 
1426     if (dir == DEGREES_0) {
1427 	R128CCESubsequentSolidFillRect(pScrn, x, y, len, 1);
1428     } else {
1429 	R128CCESubsequentSolidFillRect(pScrn, x, y, 1, len);
1430     }
1431 }
1432 
1433 /* Dashed lines */
R128CCESetupForDashedLine(ScrnInfoPtr pScrn,int fg,int bg,int rop,unsigned int planemask,int length,unsigned char * pattern)1434 static void R128CCESetupForDashedLine(ScrnInfoPtr pScrn,
1435 				   int fg, int bg,
1436 				   int rop, unsigned int planemask,
1437 				   int length, unsigned char *pattern)
1438 {
1439     R128InfoPtr   info      = R128PTR(pScrn);
1440     uint32_t      pat       = *(uint32_t *)(pointer)pattern;
1441     RING_LOCALS;
1442 
1443     R128CCE_REFRESH( pScrn, info );
1444 
1445 #if X_BYTE_ORDER == X_LITTLE_ENDIAN
1446 # define PAT_SHIFT(pat,n) pat << n
1447 #else
1448 # define PAT_SHIFT(pat,n) pat >> n
1449 #endif
1450 
1451     switch (length) {
1452     case  2: pat |= PAT_SHIFT(pat,2); /* fall through */
1453     case  4: pat |= PAT_SHIFT(pat,4); /* fall through */
1454     case  8: pat |= PAT_SHIFT(pat,8); /* fall through */
1455     case 16: pat |= PAT_SHIFT(pat,16);
1456     }
1457 
1458     BEGIN_RING( 10 );
1459 
1460     OUT_RING_REG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
1461 				     | (bg == -1
1462 					? R128_GMC_BRUSH_32x1_MONO_FG_LA
1463 					: R128_GMC_BRUSH_32x1_MONO_FG_BG)
1464 				     | R128_ROP[rop].pattern
1465 				     | R128_GMC_BYTE_LSB_TO_MSB));
1466     OUT_RING_REG(R128_DP_WRITE_MASK,      planemask);
1467     OUT_RING_REG(R128_DP_BRUSH_FRGD_CLR,  fg);
1468     OUT_RING_REG(R128_DP_BRUSH_BKGD_CLR,  bg);
1469     OUT_RING_REG(R128_BRUSH_DATA0,        pat);
1470 
1471     ADVANCE_RING();
1472 }
1473 
R128CCESubsequentDashedBresenhamLine(ScrnInfoPtr pScrn,int x,int y,int major,int minor,int err,int len,int octant,int phase)1474 static void R128CCESubsequentDashedBresenhamLine(ScrnInfoPtr pScrn,
1475 					      int x, int y,
1476 					      int major, int minor,
1477 					      int err, int len, int octant,
1478 					      int phase)
1479 {
1480     R128InfoPtr   info      = R128PTR(pScrn);
1481     int           flags     = 0;
1482     RING_LOCALS;
1483 
1484     R128CCE_REFRESH( pScrn, info );
1485 
1486     if (octant & YMAJOR)         flags |= R128_DST_Y_MAJOR;
1487     if (!(octant & XDECREASING)) flags |= R128_DST_X_DIR_LEFT_TO_RIGHT;
1488     if (!(octant & YDECREASING)) flags |= R128_DST_Y_DIR_TOP_TO_BOTTOM;
1489 
1490     BEGIN_RING( 14 );
1491 
1492     OUT_RING_REG(R128_DP_CNTL_XDIR_YDIR_YMAJOR, flags);
1493     OUT_RING_REG(R128_DST_Y_X,                  (y << 16) | x);
1494     OUT_RING_REG(R128_BRUSH_Y_X,                (phase << 16) | phase);
1495     OUT_RING_REG(R128_DST_BRES_ERR,             err);
1496     OUT_RING_REG(R128_DST_BRES_INC,             minor);
1497     OUT_RING_REG(R128_DST_BRES_DEC,             -major);
1498     OUT_RING_REG(R128_DST_BRES_LNTH,            len);
1499 
1500     ADVANCE_RING();
1501 }
1502 
1503 /* Mono 8x8 pattern color expansion */
R128CCESetupForMono8x8PatternFill(ScrnInfoPtr pScrn,int patternx,int patterny,int fg,int bg,int rop,unsigned int planemask)1504 static void R128CCESetupForMono8x8PatternFill(ScrnInfoPtr pScrn,
1505 					   int patternx, int patterny,
1506 					   int fg, int bg, int rop,
1507 					   unsigned int planemask)
1508 {
1509     R128InfoPtr   info      = R128PTR(pScrn);
1510     RING_LOCALS;
1511 
1512     R128CCE_REFRESH( pScrn, info );
1513 
1514     BEGIN_RING( 12 );
1515 
1516     OUT_RING_REG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
1517 				     | (bg == -1
1518 					? R128_GMC_BRUSH_8X8_MONO_FG_LA
1519 					: R128_GMC_BRUSH_8X8_MONO_FG_BG)
1520 				     | R128_ROP[rop].pattern
1521 				     | R128_GMC_BYTE_LSB_TO_MSB));
1522     OUT_RING_REG(R128_DP_WRITE_MASK,      planemask);
1523     OUT_RING_REG(R128_DP_BRUSH_FRGD_CLR,  fg);
1524     OUT_RING_REG(R128_DP_BRUSH_BKGD_CLR,  bg);
1525     OUT_RING_REG(R128_BRUSH_DATA0,        patternx);
1526     OUT_RING_REG(R128_BRUSH_DATA1,        patterny);
1527 
1528     ADVANCE_RING();
1529 }
1530 
R128CCESubsequentMono8x8PatternFillRect(ScrnInfoPtr pScrn,int patternx,int patterny,int x,int y,int w,int h)1531 static void R128CCESubsequentMono8x8PatternFillRect(ScrnInfoPtr pScrn,
1532 						 int patternx, int patterny,
1533 						 int x, int y, int w, int h)
1534 {
1535     R128InfoPtr   info      = R128PTR(pScrn);
1536     RING_LOCALS;
1537 
1538     R128CCE_REFRESH( pScrn, info );
1539 
1540     BEGIN_RING( 6 );
1541 
1542     OUT_RING_REG(R128_BRUSH_Y_X,        (patterny << 8) | patternx);
1543     OUT_RING_REG(R128_DST_Y_X,          (y << 16) | x);
1544     OUT_RING_REG(R128_DST_HEIGHT_WIDTH, (h << 16) | w);
1545 
1546     ADVANCE_RING();
1547 }
1548 #endif
1549 
1550 /* Get an indirect buffer for the CCE 2D acceleration commands.
1551  */
R128CCEGetBuffer(ScrnInfoPtr pScrn)1552 drmBufPtr R128CCEGetBuffer( ScrnInfoPtr pScrn )
1553 {
1554     R128InfoPtr   info = R128PTR(pScrn);
1555     drmDMAReq dma;
1556     drmBufPtr buf = NULL;
1557     int indx = 0;
1558     int size = 0;
1559     int ret, i = 0;
1560 
1561 #if 0
1562     /* FIXME: pScrn->pScreen has not been initialized when this is first
1563        called from RADEONSelectBuffer via RADEONDRICPInit.  We could use
1564        the screen index from pScrn, which is initialized, and then get
1565        the screen from screenInfo.screens[index], but that is a hack. */
1566     dma.context = DRIGetContext(pScrn->pScreen);
1567 #else
1568     dma.context = 0x00000001; /* This is the X server's context */
1569 #endif
1570     dma.send_count = 0;
1571     dma.send_list = NULL;
1572     dma.send_sizes = NULL;
1573     dma.flags = 0;
1574     dma.request_count = 1;
1575     dma.request_size = R128_BUFFER_SIZE;
1576     dma.request_list = &indx;
1577     dma.request_sizes = &size;
1578     dma.granted_count = 0;
1579 
1580     while ( 1 ) {
1581 	do {
1582 	    ret = drmDMA( info->drmFD, &dma );
1583 	    if ( ret && ret != -EAGAIN ) {
1584 		xf86DrvMsg( pScrn->scrnIndex, X_ERROR,
1585 			    "%s: CCE GetBuffer %d\n", __FUNCTION__, ret );
1586 	    }
1587 	} while ( ( ret == -EAGAIN ) && ( i++ < R128_TIMEOUT ) );
1588 
1589 	if ( ret == 0 ) {
1590 	    buf = &info->buffers->list[indx];
1591 	    buf->used = 0;
1592 	    if ( R128_VERBOSE ) {
1593 		xf86DrvMsg( pScrn->scrnIndex, X_INFO,
1594 			    "   GetBuffer returning %d\n", buf->idx );
1595 	    }
1596 	    return buf;
1597 	}
1598 
1599 	xf86DrvMsg( pScrn->scrnIndex, X_ERROR,
1600 		    "GetBuffer timed out, resetting engine...\n");
1601 	R128EngineReset( pScrn );
1602 	/* R128EngineRestore( pScrn ); FIXME ??? */
1603 
1604 	/* Always restart the engine when doing CCE 2D acceleration */
1605 	R128CCE_RESET( pScrn, info );
1606 	R128CCE_START( pScrn, info );
1607     }
1608 }
1609 
1610 /* Flush the indirect buffer to the kernel for submission to the card.
1611  */
R128CCEFlushIndirect(ScrnInfoPtr pScrn,int discard)1612 void R128CCEFlushIndirect( ScrnInfoPtr pScrn, int discard )
1613 {
1614     R128InfoPtr   info = R128PTR(pScrn);
1615     drmBufPtr buffer = info->indirectBuffer;
1616     int start = info->indirectStart;
1617     drmR128Indirect indirect;
1618 
1619     if ( !buffer )
1620 	return;
1621 
1622     if ( (start == buffer->used) && !discard )
1623         return;
1624 
1625     indirect.idx = buffer->idx;
1626     indirect.start = start;
1627     indirect.end = buffer->used;
1628     indirect.discard = discard;
1629 
1630     drmCommandWriteRead( info->drmFD, DRM_R128_INDIRECT,
1631                          &indirect, sizeof(drmR128Indirect));
1632 
1633     if ( discard )
1634         buffer = info->indirectBuffer = R128CCEGetBuffer( pScrn );
1635 
1636     /* pad to an even number of dwords */
1637     if (buffer->used & 7)
1638         buffer->used = ( buffer->used+7 ) & ~7;
1639 
1640     info->indirectStart = buffer->used;
1641 }
1642 
1643 /* Flush and release the indirect buffer.
1644  */
R128CCEReleaseIndirect(ScrnInfoPtr pScrn)1645 void R128CCEReleaseIndirect( ScrnInfoPtr pScrn )
1646 {
1647     R128InfoPtr   info = R128PTR(pScrn);
1648     drmBufPtr buffer = info->indirectBuffer;
1649     int start = info->indirectStart;
1650     drmR128Indirect indirect;
1651 
1652     info->indirectBuffer = NULL;
1653     info->indirectStart = 0;
1654 
1655     if ( !buffer )
1656 	return;
1657 
1658     indirect.idx = buffer->idx;
1659     indirect.start = start;
1660     indirect.end = buffer->used;
1661     indirect.discard = 1;
1662 
1663     drmCommandWriteRead( info->drmFD, DRM_R128_INDIRECT,
1664                          &indirect, sizeof(drmR128Indirect));
1665 }
1666 
1667 #ifdef HAVE_XAA_H
1668 /* This callback is required for multihead cards using XAA */
1669 static
R128RestoreCCEAccelState(ScrnInfoPtr pScrn)1670 void R128RestoreCCEAccelState(ScrnInfoPtr pScrn)
1671 {
1672     R128InfoPtr info        = R128PTR(pScrn);
1673 /*    unsigned char *R128MMIO = info->MMIO;  needed for OUTREG below */
1674     /*xf86DrvMsg(pScrn->scrnIndex, X_INFO, "===>RestoreCP\n");*/
1675 
1676     R128WaitForFifo(pScrn, 1);
1677 /* is this needed on r128
1678     OUTREG( R128_DEFAULT_OFFSET, info->frontPitchOffset);
1679 */
1680     R128WaitForIdle(pScrn);
1681 
1682     /* FIXME: May need to restore other things,
1683        like BKGD_CLK FG_CLK...*/
1684 
1685 }
1686 
R128CCEAccelInit(ScrnInfoPtr pScrn,XAAInfoRecPtr a)1687 static void R128CCEAccelInit(ScrnInfoPtr pScrn, XAAInfoRecPtr a)
1688 {
1689     R128InfoPtr info = R128PTR(pScrn);
1690 
1691     a->Flags                            = (PIXMAP_CACHE
1692 					   | OFFSCREEN_PIXMAPS
1693 					   | LINEAR_FRAMEBUFFER);
1694 
1695 				/* Sync */
1696     a->Sync                             = R128CCEWaitForIdle;
1697 
1698     /* Solid Filled Rectangle */
1699     a->PolyFillRectSolidFlags           = 0;
1700     a->SetupForSolidFill                = R128CCESetupForSolidFill;
1701     a->SubsequentSolidFillRect          = R128CCESubsequentSolidFillRect;
1702 
1703 				/* Screen-to-screen Copy */
1704 				/* Transparency uses the wrong colors for
1705 				   24 bpp mode -- the transparent part is
1706 				   correct, but the opaque color is wrong.
1707 				   This can be seen with netscape's I-bar
1708 				   cursor when editing in the URL location
1709 				   box. */
1710     a->ScreenToScreenCopyFlags          = ((pScrn->bitsPerPixel == 24)
1711 					   ? NO_TRANSPARENCY
1712 					   : 0);
1713     a->SetupForScreenToScreenCopy       = R128CCESetupForScreenToScreenCopy;
1714     a->SubsequentScreenToScreenCopy     = R128CCESubsequentScreenToScreenCopy;
1715 
1716 				/* Indirect CPU-To-Screen Color Expand */
1717     a->ScanlineCPUToScreenColorExpandFillFlags = LEFT_EDGE_CLIPPING
1718 					       | LEFT_EDGE_CLIPPING_NEGATIVE_X;
1719     a->NumScanlineColorExpandBuffers   = 1;
1720     a->ScanlineColorExpandBuffers      = info->scratch_buffer;
1721     info->scratch_buffer[0]            = NULL;
1722     a->SetupForScanlineCPUToScreenColorExpandFill
1723 	= R128CCESetupForScanlineCPUToScreenColorExpandFill;
1724     a->SubsequentScanlineCPUToScreenColorExpandFill
1725 	= R128CCESubsequentScanlineCPUToScreenColorExpandFill;
1726     a->SubsequentColorExpandScanline   = R128CCESubsequentColorExpandScanline;
1727 
1728 				/* Bresenham Solid Lines */
1729     a->SetupForSolidLine               = R128CCESetupForSolidLine;
1730     a->SubsequentSolidBresenhamLine    = R128CCESubsequentSolidBresenhamLine;
1731     a->SubsequentSolidHorVertLine      = R128CCESubsequentSolidHorVertLine;
1732 
1733 				/* Bresenham Dashed Lines*/
1734     a->SetupForDashedLine              = R128CCESetupForDashedLine;
1735     a->SubsequentDashedBresenhamLine   = R128CCESubsequentDashedBresenhamLine;
1736     a->DashPatternMaxLength            = 32;
1737     a->DashedLineFlags                 = (LINE_PATTERN_LSBFIRST_LSBJUSTIFIED
1738 					  | LINE_PATTERN_POWER_OF_2_ONLY);
1739 
1740 				/* Mono 8x8 Pattern Fill (Color Expand) */
1741     a->SetupForMono8x8PatternFill       = R128CCESetupForMono8x8PatternFill;
1742     a->SubsequentMono8x8PatternFillRect = R128CCESubsequentMono8x8PatternFillRect;
1743     a->Mono8x8PatternFillFlags          = (HARDWARE_PATTERN_PROGRAMMED_BITS
1744 					   | HARDWARE_PATTERN_PROGRAMMED_ORIGIN
1745 					   | HARDWARE_PATTERN_SCREEN_ORIGIN
1746 					   | BIT_ORDER_IN_BYTE_LSBFIRST);
1747 
1748     if (xf86IsEntityShared(info->pEnt->index))
1749         a->RestoreAccelState           = R128RestoreCCEAccelState;
1750 
1751 }
1752 #endif
1753 #endif
1754 
1755 #ifdef HAVE_XAA_H
1756 /* This callback is required for multihead cards using XAA */
1757 static
R128RestoreAccelState(ScrnInfoPtr pScrn)1758 void R128RestoreAccelState(ScrnInfoPtr pScrn)
1759 {
1760     R128InfoPtr info        = R128PTR(pScrn);
1761     unsigned char *R128MMIO = info->MMIO;
1762 
1763     R128WaitForFifo(pScrn, 2);
1764     OUTREG(R128_DEFAULT_OFFSET, pScrn->fbOffset);
1765     OUTREG(R128_DEFAULT_PITCH,  info->pitch);
1766 
1767     /* FIXME: May need to restore other things,
1768        like BKGD_CLK FG_CLK...*/
1769 
1770     R128WaitForIdle(pScrn);
1771 
1772 }
1773 
R128MMIOAccelInit(ScrnInfoPtr pScrn,XAAInfoRecPtr a)1774 static void R128MMIOAccelInit(ScrnInfoPtr pScrn, XAAInfoRecPtr a)
1775 {
1776     R128InfoPtr info = R128PTR(pScrn);
1777 
1778     a->Flags                            = (PIXMAP_CACHE
1779 					   | OFFSCREEN_PIXMAPS
1780 					   | LINEAR_FRAMEBUFFER);
1781 
1782 				/* Sync */
1783     a->Sync                             = R128WaitForIdle;
1784 
1785 				/* Solid Filled Rectangle */
1786     a->PolyFillRectSolidFlags           = 0;
1787     a->SetupForSolidFill                = R128SetupForSolidFill;
1788     a->SubsequentSolidFillRect          = R128SubsequentSolidFillRect;
1789 
1790 				/* Screen-to-screen Copy */
1791 				/* Transparency uses the wrong colors for
1792 				   24 bpp mode -- the transparent part is
1793 				   correct, but the opaque color is wrong.
1794 				   This can be seen with netscape's I-bar
1795 				   cursor when editing in the URL location
1796 				   box. */
1797     a->ScreenToScreenCopyFlags          = ((pScrn->bitsPerPixel == 24)
1798 					   ? NO_TRANSPARENCY
1799 					   : 0);
1800     a->SetupForScreenToScreenCopy       = R128SetupForScreenToScreenCopy;
1801     a->SubsequentScreenToScreenCopy     = R128SubsequentScreenToScreenCopy;
1802 
1803 				/* Mono 8x8 Pattern Fill (Color Expand) */
1804     a->SetupForMono8x8PatternFill       = R128SetupForMono8x8PatternFill;
1805     a->SubsequentMono8x8PatternFillRect = R128SubsequentMono8x8PatternFillRect;
1806     a->Mono8x8PatternFillFlags          = (HARDWARE_PATTERN_PROGRAMMED_BITS
1807 					   | HARDWARE_PATTERN_PROGRAMMED_ORIGIN
1808 					   | HARDWARE_PATTERN_SCREEN_ORIGIN
1809 					   | BIT_ORDER_IN_BYTE_LSBFIRST);
1810 
1811 				/* Indirect CPU-To-Screen Color Expand */
1812     a->ScanlineCPUToScreenColorExpandFillFlags = LEFT_EDGE_CLIPPING
1813 					       | LEFT_EDGE_CLIPPING_NEGATIVE_X;
1814     a->NumScanlineColorExpandBuffers   = 1;
1815     a->ScanlineColorExpandBuffers      = info->scratch_buffer;
1816     info->scratch_save                 = malloc(((pScrn->virtualX+31)/32*4)
1817 					    + (pScrn->virtualX
1818 					    * info->CurrentLayout.pixel_bytes));
1819     info->scratch_buffer[0]            = info->scratch_save;
1820     a->SetupForScanlineCPUToScreenColorExpandFill
1821 	= R128SetupForScanlineCPUToScreenColorExpandFill;
1822     a->SubsequentScanlineCPUToScreenColorExpandFill
1823 	= R128SubsequentScanlineCPUToScreenColorExpandFill;
1824     a->SubsequentColorExpandScanline   = R128SubsequentColorExpandScanline;
1825 
1826 				/* Bresenham Solid Lines */
1827     a->SetupForSolidLine               = R128SetupForSolidLine;
1828     a->SubsequentSolidBresenhamLine    = R128SubsequentSolidBresenhamLine;
1829     a->SubsequentSolidHorVertLine      = R128SubsequentSolidHorVertLine;
1830 
1831 				/* Bresenham Dashed Lines*/
1832     a->SetupForDashedLine              = R128SetupForDashedLine;
1833     a->SubsequentDashedBresenhamLine   = R128SubsequentDashedBresenhamLine;
1834     a->DashPatternMaxLength            = 32;
1835     a->DashedLineFlags                 = (LINE_PATTERN_LSBFIRST_LSBJUSTIFIED
1836 					  | LINE_PATTERN_POWER_OF_2_ONLY);
1837 
1838 				/* ImageWrite */
1839     a->NumScanlineImageWriteBuffers    = 1;
1840     a->ScanlineImageWriteBuffers       = info->scratch_buffer;
1841     info->scratch_buffer[0]            = info->scratch_save;
1842     a->SetupForScanlineImageWrite      = R128SetupForScanlineImageWrite;
1843     a->SubsequentScanlineImageWriteRect= R128SubsequentScanlineImageWriteRect;
1844     a->SubsequentImageWriteScanline    = R128SubsequentImageWriteScanline;
1845     a->ScanlineImageWriteFlags         = CPU_TRANSFER_PAD_DWORD
1846 		/* Performance tests show that we shouldn't use GXcopy for
1847 		 * uploads as a memcpy is faster */
1848 					  | NO_GXCOPY
1849 					  | LEFT_EDGE_CLIPPING
1850 					  | LEFT_EDGE_CLIPPING_NEGATIVE_X
1851 					  | SCANLINE_PAD_DWORD;
1852 
1853     if (xf86IsEntityShared(info->pEnt->index)) {
1854         /* If there are more than one devices sharing this entity, we
1855          * have to assign this call back, otherwise the XAA will be
1856          * disabled.
1857 	 */
1858         if (xf86GetNumEntityInstances(info->pEnt->index) > 1)
1859             a->RestoreAccelState           = R128RestoreAccelState;
1860     }
1861 
1862 }
1863 #endif
1864 
R128CopySwap(uint8_t * dst,uint8_t * src,unsigned int size,int swap)1865 void R128CopySwap(uint8_t *dst, uint8_t *src, unsigned int size, int swap)
1866 {
1867     switch(swap) {
1868     case APER_0_BIG_ENDIAN_32BPP_SWAP:
1869 	{
1870 	    unsigned int *d = (unsigned int *)dst;
1871 	    unsigned int *s = (unsigned int *)src;
1872 	    unsigned int nwords = size >> 2;
1873 
1874 	    for (; nwords > 0; --nwords, ++d, ++s)
1875 #ifdef __powerpc__
1876 		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
1877 #else
1878 		*d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
1879 			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
1880 #endif
1881 	    return;
1882 	}
1883     case APER_0_BIG_ENDIAN_16BPP_SWAP:
1884 	{
1885 	    unsigned short *d = (unsigned short *)dst;
1886 	    unsigned short *s = (unsigned short *)src;
1887 	    unsigned int nwords = size >> 1;
1888 
1889 	    for (; nwords > 0; --nwords, ++d, ++s)
1890 #ifdef __powerpc__
1891 		asm volatile("sthbrx %0,0,%1" : : "r" (*s), "r" (d));
1892 #else
1893 	        *d = (*s >> 8) | (*s << 8);
1894 #endif
1895 	    return;
1896 	}
1897     }
1898     if (src != dst)
1899 	memcpy(dst, src, size);
1900 }
1901 
1902 /* Initialize XAA for supported acceleration and also initialize the
1903    graphics hardware for acceleration. */
1904 #ifdef HAVE_XAA_H
1905 Bool
R128XAAAccelInit(ScreenPtr pScreen)1906 R128XAAAccelInit(ScreenPtr pScreen)
1907 {
1908     ScrnInfoPtr   pScrn = xf86ScreenToScrn(pScreen);
1909     R128InfoPtr   info  = R128PTR(pScrn);
1910     XAAInfoRecPtr a;
1911 
1912     if (!(a = info->accel = XAACreateInfoRec())) return FALSE;
1913 
1914 #ifdef R128DRI
1915     if (info->directRenderingEnabled)
1916         R128CCEAccelInit(pScrn, a);
1917     else
1918 #endif
1919     R128MMIOAccelInit(pScrn, a);
1920 
1921     R128EngineInit(pScrn);
1922     return XAAInit(pScreen, a);
1923 }
1924 #endif
1925