xref: /netbsd/sys/arch/amiga/dev/grf_ultms.g (revision 6550d01e)
1; $NetBSD: grf_ultms.g,v 1.10 2009/11/09 15:35:27 is Exp $
2;
3; ite support for A2410.
4
5;
6; Copyright (c) 1995 Ignatios Souvatzis.
7; All rights reserved.
8;
9; Redistribution and use in source and binary forms, with or without
10; modification, are permitted provided that the following conditions
11; are met:
12; 1. Redistributions of source code must retain the above copyright
13;    notice, this list of conditions and the following disclaimer.
14; 2. Redistributions in binary form must reproduce the above copyright
15;    notice, this list of conditions and the following disclaimer in the
16;    documentation and/or other materials provided with the distribution.
17;
18; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21; IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29; This file contains the source code for grf_ultms.c. It is assembler
30; code for the TMS34010 CPU/graphics processor, as understood by the
31; in-tree version of Paul Mackerras' "gspa" assembler.
32;
33; Use 'make grf_ultms.c' to generate the .c file.
34
35; memory map:
36; FF800000 .. FF9FFFFF	overlay planes
37; FFA00000 .. FFA0FFFF	ite support code
38; FFA10000 .. FFA1FFFF	ite support, input queue
39; FFA20000 .. FFA2FEFF	variables
40; FFA2FF00 .. FFA2FFFF	variables, X server
41; FFA30000 .. FFA3FFFF	font data
42; FFA40000 .. FFA4FFFF	font data, bold
43; FFA50000 .. FFA5FFFF	X server, input queue
44; FFA60000 .. FFFFC000	X server, onboard pixmaps
45
46; Start of data area
47	.org	$FFA20000
48d:
49
50;
51; Ring buffer for getting stuff from host
52; Data buffer:
53inbuf	=	$FFA10000	; 64kbits here (8k bytes)
54;
55; Pointers: (these must be at address $FFA20000)
56put:		.long	inbuf
57get:		.long	inbuf
58
59;
60; Mode bits for communication between GSP and CPU
61;
62; GSP mode bits: set by CPU, control GSP operation
63GSP_HOLD =	0
64GSP_FLUSH =	1
65GSP_ALT_SCRN =	2
66GSP_DISP_CTRL =	3
67GSP_NO_CURSOR =	4
68GSP_CALL_X =	5
69gsp_mode:	.word	0
70
71;
72; Pointer to X operation routine
73xproc:		.long	0
74
75; We leave the next few words for future communication requirements
76
77		.org	d+0x100
78;
79; Other data:
80magic:		.blkl	1		; set => screen already inited
81MAGIC =		0xD0D0BEAC
82
83screen_width:	.word	1024
84screen_height:	.word	768
85screen_origin:	.long	$FE000000	; just a placeholder
86screen_pitch:	.word	8192		; 1024*8
87pixel_size:	.word	8
88
89		.org	d+0x200
90font_adr:
91;
92; Font information is stored in the structure defined declared below.
93;
94bitmap_ptrs:	.long	$FFA30000	; points to first bitmap
95font_size:	.long	$00080008	; Y:X bitmap size
96under_row:	.word	6		; row # for underlines
97under_ht:	.word	1		; thickness of underline
98first_char:	.word	32		; first and last char in font
99last_char:	.word	255		;
100bold_smear:	.word	1		; for making bold fonts
101
102bgcolor:	.long	0		; background color
103fgcolor:	.long	$01010101	; foreground color
104;precomputed out of what the host gave us:
105font_area:	.word	64		; in pixels
106font_pitch:	.word	8
107font_lmo:	.word	28
108
109
110; Control register addresses
111hesync	=	$c0000000
112dpyctl	=	$c0000080
113control	=	$c00000b0
114convsp	=	$c0000130
115convdp	=	$c0000140
116psize	=	$c0000150
117
118;
119; Bits in control register
120T	=	$20		; enable transparency
121W	=	$C0		; window options
122PBH	=	$100		; pixblt horiz dirn
123PBV	=	$200		; pixblt vertical dirn
124PPOP	=	$7C00		; pixel processing options
125
126;
127; Bits in dpyctl register
128SRT	=	$800		; do serial register transfers
129
130free_memory:	.long	free_memory_start
131free_memory_start:		; allocate dynamic arrays from here
132
133;
134; Program starts here.
135	.org	$FFA00000
136	.start	.
137
138;
139; initialization
140;
141	setf	16,0,0		; just in case
142	setf	32,0,1
143	move	$fffff000,sp
144
145; Set up sync, blank parameters
146; done by host through interface
147
148; set up overlay clut:
149	move	$0,a0
150	move	a0,@$fe800000
151	move	$fe800030,a1
152	move	128,a0
153	move	a0,*a1
154	move	a0,*a1
155	move	a0,*a1
156	move	0,a0
157	move	a0,*a1
158	move	a0,*a1
159	move	a0,*a1
160	move	a0,*a1
161	move	a0,*a1
162	move	a0,*a1
163	move	a0,*a1
164	move	a0,*a1
165	move	a0,*a1
166
167; set up overlay planes:
168	move	6,a0
169	move	a0,@$fe800000
170	move	$0b,a0
171	move	a0,@$fe800020
172
173; set up global registers
174	move	@screen_pitch,b3,0
175	move	@screen_origin,b4,1
176	move	@bgcolor,b8,1
177	lmo	b3,b0
178	move	b0,@convdp,0
179	move	@control,a0,0
180	andn	$7FE0,a0		; clear PPOP, PBV, PBH, W, T fields
181	move	a0,@control,0
182	move	@pixel_size,a0,0
183	move	a0,@psize,0
184	move	@psize,a0,0
185
186
187; clear the entire screen
188	move	b4,b2
189	move	0,b9
190	move	@screen_width,b7,1
191	fill	l
192
1934:
194; main stuff...
195	move	@get,a0,1
196	jruc	main_loop
197loop_end:
198	clr	a4
199	move	a4,*a0,0
200	addxy	a1,a0
201	move	a0,@get,1
202main_loop:
203	move	@gsp_mode,a1,0
204	btst	GSP_CALL_X,a1
205	jreq	main_loop_1
206
207	mmtm	sp,a0,a1,a2,a3
208	move	@xproc,a4,1
209	call	a4
210	mmfm	sp,a0,a1,a2,a3
211
212main_loop_1:
213	move	@put,a3,1
214	move	*a0,a1,0
215
216	move	a1,a2
217	andi	$FFF0,a1
218	jrz	main_loop
219
220	sub	a0,a3
221	jreq	main_loop
222continue:
223	andi	$F,a2
224	jrz	loop_end
225	dec	a2
226	jrnz	testfor2
227; op 1 - char
228	movk	6,b10
229	move	b10,@$fe800000,0
230	movk	1,b10
231	move	b10,@$fe800020,0
232
233	move	a0,b10
234	move	*b10+,b12,0	; dummy move (faster than addk)
235	move	*b10+,b12,0	; char code
236	move	@first_char,b11,0
237	sub	b11,b12		; minus first char in font
238	move	@font_size,b7,1	;dydx - char size->pixel array dimensions
239	move	@font_pitch,b1
240	move	@font_lmo,b0
241	move	b0,@convsp,0
242	move	@font_area,b11
243
244	mpyu	b12,b11		; times char offset
245	move	@font_adr,b0,1	; font bitmaps base
246	add	b11,b0		; character bitmap start addr. linear
247
248	move	*b10+,b8,0	; fg
249	move	*b10+,b9,0	; bg
250	move	*b10+,b2,1	; y:x
251
252	move	*b10+,b11,0	; flags
253	move	b11,a4
254	btst	0,a4
255	jreq	noinv
256	move	b8,b11
257	move	b9,b8
258	move	b11,b9
259noinv:
260	btst	2,a4
261	jreq	nobold
262	addi	$10000,b0
263nobold:
264	move	b2,a5
265	pixblt	b,xy
266	move	a5,b2
267
268	btst	1,a4
269	jreq	noul
270	move	@under_row,b11,0
271	sll	16,b11		; shift into Y half
272	add	b11,b2
273	move	@under_ht,b11,0
274	sll	16,b11		; shift into Y half
275	movy	b11,b7		; and move Y half only
276	fill	xy
277noul:
278	jruc	loop_end
279testfor2:
280	dec	a2
281	jrnz	testfor3
282; op 2 - fill
283	move	a0,b10
284	move	*b10+,b9,0	; dummy move
285	move	*b10+,b9,0	; color
286	move	*b10+,b2,1	; XY start address
287	move	*b10+,b7,1	; dydx
288
289	move	@control,b0,0
290	move	b0,*-sp
291	move	*b10+,b0
292	setf	5,0,0
293	move	b0,@control+10
294	setf	16,0,0
295	move	@control,b0,0
296
297	fill	xy
298
299	move	*sp+,b0
300	move	b0,@control,0
301	jruc	loop_end,l
302
303testfor3:
304	dec	a2
305	jrnz	testfor4
306; op 3 - pixblt
307	move	a0,b10
308	move	@convdp,@convsp,0
309	move	*b10+,b0,0	; dummy move
310	move	*b10+,b0,1	; XY src
311	move	*b10+,b7,1	; dxdy
312	move	*b10+,b2,1	; XY dst
313	move	b3,b1
314	move	@control,b11,0
315	andni	PBH|PBV,b11
316	cmpxy	b0,b2
317	jrc	yok
318	ori	PBV,b11
319yok:	jrv	xok
320	ori	PBH,b11
321xok:	move	b11,@control,0
322	move	@control,b11,0
323
324	pixblt	xy,xy
325	jruc	loop_end,l
326
327testfor4:
328	dec	a2
329	jrnz	testfor5
330
331; op 4 - mirror the font and precompute some values.
332
333	move	@font_size,a5,0
334	movk	8,a6
335	cmp	a6,a5
336	jrle	t4b8
337	movi	16, a6
338t4b8:	move	a6,@font_pitch,0
339	lmo	a5,a6
340	move	a6,@font_lmo,0
341	move	@font_size+$10,a6,0
342	move	@font_pitch,a5,0
343	mpyu	a6,a5
344	move	a5,@font_area,0
345
346	move	@last_char,a6,0
347	move	@first_char,a5,0
348	sub	a5,a6
349	addk	1,a6
350	move	@font_size+$10,a5,0
351	mpyu	a6,a5
352	move	@font_size,a7,0
353	cmpi	8,a7
354	move	$7f7f,a12	; mask for bold smearing
355	jrgt	t4bf		; wider than 8 pixels?
356	addk	1,a5		; yes, the words are only half the # of rows
357	srl	1,a5
358	move	$7fff,a12	; mask for bold smearing changes, too
359t4bf:	move	@font_adr,a6,1
360	move	a6,a9
361	addi	$10000,a9 ; start address of bold font
362	move	@bold_smear,a10
363
364; fortunately, this loop fits into 3 of the 4 cache segments:
365; execution time: about 32 periods per word of font.
366
367mirlp:	move	*a6,a7
368	clr	a8
369
370	srl	1,a7
371	addc	a8,a8
372	srl	1,a7
373	addc	a8,a8
374	srl	1,a7
375	addc	a8,a8
376	srl	1,a7
377	addc	a8,a8
378
379	srl	1,a7
380	addc	a8,a8
381	srl	1,a7
382	addc	a8,a8
383	srl	1,a7
384	addc	a8,a8
385	srl	1,a7
386	addc	a8,a8
387
388	srl	1,a7
389	addc	a8,a8
390	srl	1,a7
391	addc	a8,a8
392	srl	1,a7
393	addc	a8,a8
394	srl	1,a7
395	addc	a8,a8
396
397	srl	1,a7
398	addc	a8,a8
399	srl	1,a7
400	addc	a8,a8
401	srl	1,a7
402	addc	a8,a8
403	srl	1,a7
404	addc	a8,a8
405
406	move	a8,*a6+
407	move	a8,a7
408	move	a10,a11
409smearlp:
410	and	a12,a7
411	sll	1,a7
412	or	a7,a8
413	dsj	a11,smearlp
414	move	a8,*a9+
415
416	dsj	a5,mirlp
417;; support odd-sized fonts. pitch must still be 8 or 16
418	move	@font_size,a5,0
419	move	@font_pitch,a6,0
420	sub	a5,a6
421	move	@font_adr,a5,1
422	add	a5,a6
423	move	a6,@font_adr,1
424;;
425	jruc	loop_end,l
426
427
428testfor5:
429	dec	a2
430	jrne	testfor6
431; loadclut --- load clut entry.
432;	1==overlay index red green blue
433;	for speed reasons, the host will load the image clut directly rather
434;	than through us, but its not that expensive to support both here
435;	just in case
436	move	a0,a4
437	addk	$10,a4
438	move	$fe800030,a6
439	move	*a4+,a5,0
440	jrne	t5l1
441	subk	$20,a6
442t5l1:	move	*a4+,a5,0
443	move	a5,@$fe800000,0
444	move	*a4+,a5,0
445	move	a5,*a6,0
446	move	*a4+,a5,0
447	move	a5,*a6,0
448	move	*a4+,a5,0
449	move	a5,*a6,0
450	jruc	loop_end,l
451
452testfor6:
453	dec	a2
454	jrne	testfor7
455
456; op 6: load new framebuffer size and position for ite support.
457	move	a0,b10
458	addk	$10,b10
459	move	*b10+,b7,1
460	move	b7,@screen_width,1
461	move	*b10+,b4,1
462	move	b4,@screen_origin,1
463	move	*b10+,b3,0
464	move	b3,@screen_pitch,0
465	lmo	b3,b0
466	move	b0,@convdp,0
467	move	*b10,b0,0
468	move	b0,@psize,0
469	move	b0,@pixel_size,0	; this syncs the psize write, too
470
471	jruc	loop_end,l
472
473testfor7:
474	jruc	loop_end,l
475;;;
476