xref: /netbsd/sys/arch/amiga/dev/grf_ultms.g (revision bf9ec67e)
1; $NetBSD: grf_ultms.g,v 1.7 2002/05/26 12:53:22 jdolecek Exp $
2;
3; ite support for A2410.
4
5;
6; Copyright (c) 1995 Ignatios Souvatzis.
7; All rights reserved.
8;
9; Redistribution and use in source and binary forms, with or without
10; modification, are permitted provided that the following conditions
11; are met:
12; 1. Redistributions of source code must retain the above copyright
13;    notice, this list of conditions and the following disclaimer.
14; 2. Redistributions in binary form must reproduce the above copyright
15;    notice, this list of conditions and the following disclaimer in the
16;    documentation and/or other materials provided with the distribution.
17; 3. All advertising materials mentioning features or use of this software
18;    must display the following acknowledgement:
19;	This product contains software developed by Ignatios Souvatzis
20;	for the NetBSD project.
21; 4. The name of the author may not be used to endorse or promote products
22;    derived from this software without specific prior written permission
23;
24; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27; IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
29; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
33; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
35; This file contains the source code for grf_ultmscode.c. It is
36; assembler code for the TMS34010 CPU/graphics processor.
37;
38; Use Paul Mackerras' gspa assembler to transfer it to hex format, then
39; Ignatios Souvatzis' gpsahextoc utility to convert it to grf_ultmscode.c.
40;
41; Use 'make grf_ultms.c' to generate the .c file.
42
43; memory map:
44; FF800000 .. FF9FFFFF	overlay planes
45; FFA00000 .. FFA0FFFF	ite support code
46; FFA10000 .. FFA1FFFF	ite support, input queue
47; FFA20000 .. FFA2FEFF	variables
48; FFA2FF00 .. FFA2FFFF	variables, X server
49; FFA30000 .. FFA3FFFF	font data
50; FFA40000 .. FFA4FFFF	font data, bold
51; FFA50000 .. FFA5FFFF	X server, input queue
52; FFA60000 .. FFFFC000	X server, onboard pixmaps
53
54; Start of data area
55	.org	$FFA20000
56d:
57
58;
59; Ring buffer for getting stuff from host
60; Data buffer:
61inbuf	=	$FFA10000	; 64kbits here (8k bytes)
62;
63; Pointers: (these must be at address $FFA20000)
64put:		.long	inbuf
65get:		.long	inbuf
66
67;
68; Mode bits for communication between GSP and CPU
69;
70; GSP mode bits: set by CPU, control GSP operation
71GSP_HOLD =	0
72GSP_FLUSH =	1
73GSP_ALT_SCRN =	2
74GSP_DISP_CTRL =	3
75GSP_NO_CURSOR =	4
76GSP_CALL_X =	5
77gsp_mode:	.word	0
78
79;
80; Pointer to X operation routine
81xproc:		.long	0
82
83; We leave the next few words for future communication requirements
84
85		.org	d+0x100
86;
87; Other data:
88magic:		.blkl	1		; set => screen already inited
89MAGIC =		0xD0D0BEAC
90
91screen_width:	.word	1024
92screen_height:	.word	768
93screen_origin:	.long	$FE000000	; just a placeholder
94screen_pitch:	.word	8192		; 1024*8
95pixel_size:	.word	8
96
97		.org	d+0x200
98font_adr:
99;
100; Font information is stored in the structure defined declared below.
101;
102bitmap_ptrs:	.long	$FFA30000	; points to first bitmap
103font_size:	.long	$00080008	; Y:X bitmap size
104under_row:	.word	6		; row # for underlines
105under_ht:	.word	1		; thickness of underline
106first_char:	.word	32		; first and last char in font
107last_char:	.word	255		;
108bold_smear:	.word	1		; for making bold fonts
109
110bgcolor:	.long	0		; background color
111fgcolor:	.long	$01010101	; foreground color
112;precomputed out of what the host gave us:
113font_area:	.word	64		; in pixels
114font_pitch:	.word	8
115font_lmo:	.word	28
116
117
118; Control register addresses
119hesync	=	$c0000000
120dpyctl	=	$c0000080
121control	=	$c00000b0
122convsp	=	$c0000130
123convdp	=	$c0000140
124psize	=	$c0000150
125
126;
127; Bits in control register
128T	=	$20		; enable transparency
129W	=	$C0		; window options
130PBH	=	$100		; pixblt horiz dirn
131PBV	=	$200		; pixblt vertical dirn
132PPOP	=	$7C00		; pixel processing options
133
134;
135; Bits in dpyctl register
136SRT	=	$800		; do serial register transfers
137
138free_memory:	.long	free_memory_start
139free_memory_start:		; allocate dynamic arrays from here
140
141;
142; Program starts here.
143	.org	$FFA00000
144	.start	.
145
146;
147; initialization
148;
149	setf	16,0,0		; just in case
150	setf	32,0,1
151	move	$fffff000,sp
152
153; Set up sync, blank parameters
154; done by host through interface
155
156; set up overlay clut:
157	move	$0,a0
158	move	a0,@$fe800000
159	move	$fe800030,a1
160	move	128,a0
161	move	a0,*a1
162	move	a0,*a1
163	move	a0,*a1
164	move	0,a0
165	move	a0,*a1
166	move	a0,*a1
167	move	a0,*a1
168	move	a0,*a1
169	move	a0,*a1
170	move	a0,*a1
171	move	a0,*a1
172	move	a0,*a1
173	move	a0,*a1
174
175; set up overlay planes:
176	move	6,a0
177	move	a0,@$fe800000
178	move	$0b,a0
179	move	a0,@$fe800020
180
181; set up global registers
182	move	@screen_pitch,b3,0
183	move	@screen_origin,b4,1
184	move	@bgcolor,b8,1
185	lmo	b3,b0
186	move	b0,@convdp,0
187	move	@control,a0,0
188	andn	$7FE0,a0		; clear PPOP, PBV, PBH, W, T fields
189	move	a0,@control,0
190	move	@pixel_size,a0,0
191	move	a0,@psize,0
192	move	@psize,a0,0
193
194
195; clear the entire screen
196	move	b4,b2
197	move	0,b9
198	move	@screen_width,b7,1
199	fill	l
200
2014:
202; main stuff...
203	move	@get,a0,1
204	jruc	main_loop
205loop_end:
206	clr	a4
207	move	a4,*a0,0
208	addxy	a1,a0
209	move	a0,@get,1
210main_loop:
211	move	@gsp_mode,a1,0
212	btst	GSP_CALL_X,a1
213	jreq	main_loop_1
214
215	mmtm	sp,a0,a1,a2,a3
216	move	@xproc,a4,1
217	call	a4
218	mmfm	sp,a0,a1,a2,a3
219
220main_loop_1:
221	move	@put,a3,1
222	move	*a0,a1,0
223
224	move	a1,a2
225	andi	$FFF0,a1
226	jrz	main_loop
227
228	sub	a0,a3
229	jreq	main_loop
230continue:
231	andi	$F,a2
232	jrz	loop_end
233	dec	a2
234	jrnz	testfor2
235; op 1 - char
236	movk	6,b10
237	move	b10,@$fe800000,0
238	movk	1,b10
239	move	b10,@$fe800020,0
240
241	move	a0,b10
242	move	*b10+,b12,0	; dummy move (faster than addk)
243	move	*b10+,b12,0	; char code
244	move	@first_char,b11,0
245	sub	b11,b12		; minus first char in font
246	move	@font_size,b7,1	;dydx - char size->pixel array dimensions
247	move	@font_pitch,b1
248	move	@font_lmo,b0
249	move	b0,@convsp,0
250	move	@font_area,b11
251
252	mpyu	b12,b11		; times char offset
253	move	@font_adr,b0,1	; font bitmaps base
254	add	b11,b0		; character bitmap start addr. linear
255
256	move	*b10+,b8,0	; fg
257	move	*b10+,b9,0	; bg
258	move	*b10+,b2,1	; y:x
259
260	move	*b10+,b11,0	; flags
261	move	b11,a4
262	btst	0,a4
263	jreq	noinv
264	move	b8,b11
265	move	b9,b8
266	move	b11,b9
267noinv:
268	btst	2,a4
269	jreq	nobold
270	addi	$10000,b0
271nobold:
272	move	b2,a5
273	pixblt	b,xy
274	move	a5,b2
275
276	btst	1,a4
277	jreq	noul
278	move	@under_row,b11,0
279	sll	16,b11		; shift into Y half
280	add	b11,b2
281	move	@under_ht,b11,0
282	sll	16,b11		; shift into Y half
283	movy	b11,b7		; and move Y half only
284	fill	xy
285noul:
286	jruc	loop_end
287testfor2:
288	dec	a2
289	jrnz	testfor3
290; op 2 - fill
291	move	a0,b10
292	move	*b10+,b9,0	; dummy move
293	move	*b10+,b9,0	; color
294	move	*b10+,b2,1	; XY start address
295	move	*b10+,b7,1	; dydx
296
297	move	@control,b0,0
298	move	b0,*-sp
299	move	*b10+,b0
300	setf	5,0,0
301	move	b0,@control+10
302	setf	16,0,0
303	move	@control,b0,0
304
305	fill	xy
306
307	move	*sp+,b0
308	move	b0,@control,0
309	jruc	loop_end,l
310
311testfor3:
312	dec	a2
313	jrnz	testfor4
314; op 3 - pixblt
315	move	a0,b10
316	move	@convdp,@convsp,0
317	move	*b10+,b0,0	; dummy move
318	move	*b10+,b0,1	; XY src
319	move	*b10+,b7,1	; dxdy
320	move	*b10+,b2,1	; XY dst
321	move	b3,b1
322	move	@control,b11,0
323	andni	PBH|PBV,b11
324	cmpxy	b0,b2
325	jrc	yok
326	ori	PBV,b11
327yok:	jrv	xok
328	ori	PBH,b11
329xok:	move	b11,@control,0
330	move	@control,b11,0
331
332	pixblt	xy,xy
333	jruc	loop_end,l
334
335testfor4:
336	dec	a2
337	jrnz	testfor5
338
339; op 4 - mirror the font and precompute some values.
340
341	move	@font_size,a5,0
342	movk	8,a6
343	cmp	a6,a5
344	jrle	t4b8
345	movi	16, a6
346t4b8:	move	a6,@font_pitch,0
347	lmo	a5,a6
348	move	a6,@font_lmo,0
349	move	@font_size+$10,a6,0
350	move	@font_pitch,a5,0
351	mpyu	a6,a5
352	move	a5,@font_area,0
353
354	move	@last_char,a6,0
355	move	@first_char,a5,0
356	sub	a5,a6
357	addk	1,a6
358	move	@font_size+$10,a5,0
359	mpyu	a6,a5
360	move	@font_size,a7,0
361	cmpi	8,a7
362	move	$7f7f,a12	; mask for bold smearing
363	jrgt	t4bf		; wider than 8 pixels?
364	addk	1,a5		; yes, the words are only half the # of rows
365	srl	1,a5
366	move	$7fff,a12	; mask for bold smearing changes, too
367t4bf:	move	@font_adr,a6,1
368	move	a6,a9
369	addi	$10000,a9 ; start address of bold font
370	move	@bold_smear,a10
371
372; fortunately, this loop fits into 3 of the 4 cache segments:
373; execution time: about 32 periods per word of font.
374
375mirlp:	move	*a6,a7
376	clr	a8
377
378	srl	1,a7
379	addc	a8,a8
380	srl	1,a7
381	addc	a8,a8
382	srl	1,a7
383	addc	a8,a8
384	srl	1,a7
385	addc	a8,a8
386
387	srl	1,a7
388	addc	a8,a8
389	srl	1,a7
390	addc	a8,a8
391	srl	1,a7
392	addc	a8,a8
393	srl	1,a7
394	addc	a8,a8
395
396	srl	1,a7
397	addc	a8,a8
398	srl	1,a7
399	addc	a8,a8
400	srl	1,a7
401	addc	a8,a8
402	srl	1,a7
403	addc	a8,a8
404
405	srl	1,a7
406	addc	a8,a8
407	srl	1,a7
408	addc	a8,a8
409	srl	1,a7
410	addc	a8,a8
411	srl	1,a7
412	addc	a8,a8
413
414	move	a8,*a6+
415	move	a8,a7
416	move	a10,a11
417smearlp:
418	and	a12,a7
419	sll	1,a7
420	or	a7,a8
421	dsj	a11,smearlp
422	move	a8,*a9+
423
424	dsj	a5,mirlp
425;; support odd-sized fonts. pitch must still be 8 or 16
426	move	@font_size,a5,0
427	move	@font_pitch,a6,0
428	sub	a5,a6
429	move	@font_adr,a5,1
430	add	a5,a6
431	move	a6,@font_adr,1
432;;
433	jruc	loop_end,l
434
435
436testfor5:
437	dec	a2
438	jrne	testfor6
439; loadclut --- load clut entry.
440;	1==overlay index red green blue
441;	for speed reasons, the host will load the image clut directly rather
442;	than through us, but its not that expensive to support both here
443;	just in case
444	move	a0,a4
445	addk	$10,a4
446	move	$fe800030,a6
447	move	*a4+,a5,0
448	jrne	t5l1
449	subk	$20,a6
450t5l1:	move	*a4+,a5,0
451	move	a5,@$fe800000,0
452	move	*a4+,a5,0
453	move	a5,*a6,0
454	move	*a4+,a5,0
455	move	a5,*a6,0
456	move	*a4+,a5,0
457	move	a5,*a6,0
458	jruc	loop_end,l
459
460testfor6:
461	dec	a2
462	jrne	testfor7
463
464; op 6: load new framebuffer size and position for ite support.
465	move	a0,b10
466	addk	$10,b10
467	move	*b10+,b7,1
468	move	b7,@screen_width,1
469	move	*b10+,b4,1
470	move	b4,@screen_origin,1
471	move	*b10+,b3,0
472	move	b3,@screen_pitch,0
473	lmo	b3,b0
474	move	b0,@convdp,0
475	move	*b10,b0,0
476	move	b0,@psize,0
477	move	b0,@pixel_size,0	; this syncs the psize write, too
478
479	jruc	loop_end,l
480
481testfor7:
482	jruc	loop_end,l
483;;;
484