1; Displays entire 400+ color NTSC NES palette on screen.
2; Disables PPU rendering so that current scanline color can be
3; set directly by VRAM address, then uses cycle-timed code to
4; cycle through all colors in a clean grid.
5;
6; ca65 -o full_palette.o full_palette.s
7; ld65 -t nes full_palette.o -o full_palette.nes
8;
9; Shay Green <gblargg@gmail.com>
10
11.segment "HEADER"
12	.byte "NES",26, 2,1, 0,0
13
14.segment "VECTORS"
15	.word 0,0,0, nmi, reset, irq
16
17.segment "CHARS"
18	.res 8192
19
20.segment "STARTUP" ; avoids warning
21
22.segment "CODE"
23
24even_frame = $200
25
26irq:
27nmi:	rti
28
29wait_vbl:
30	bit $2002
31:	bit $2002
32	bpl :-
33	rts
34
35blacken_palette:
36	; Fill palette with black. Starts at $3FE0 so that VRAM
37	; address will wrap around to 0 afterwards, so that BG
38	; rendering will work correctly.
39	lda #$3F
40	sta $2006
41	lda #$E0
42	sta $2006
43	lda #$0F
44	ldy #$20
45:	sta $2007
46	dey
47	bne :-
48	rts
49
50reset:
51	sei
52	ldx #$FF
53	txs
54
55	; Init PPU
56	jsr wait_vbl
57	jsr wait_vbl
58	lda #0
59	sta $2000
60	sta $2001
61	jsr blacken_palette
62
63	; Clear nametable
64	lda #$20
65	sta $2006
66	lda #$00
67	sta $2006
68	ldx #4
69	ldy #0
70:	sta $2007
71	iny
72	bne :-
73	dex
74	bne :-
75
76	; Synchronize precisely to VBL. VBL occurs every 29780.67
77	; CPU clocks. Loop takes 27 clocks. Every 1103 iterations,
78	; the second LDA $2002 will read exactly 29781 clocks
79	; after a previous read. Thus, the loop will effectively
80	; read $2002 one PPU clock later each frame. It starts out
81	; with VBL beginning sometime after this read, so that
82	; eventually VBL will begin just before the $2002 read,
83	; and thus leave CPU exactly synchronized to VBL.
84	jsr wait_vbl
85	nop
86:	nop
87	lda $2002
88	lda $2002
89	pha
90	pla
91	pha
92	pla
93	bpl :-
94
95	lda #0
96	sta even_frame
97
98begin_frame:
99	jsr blacken_palette
100
101	; Enable BG so that PPU will make every other frame
102	; shorter by one PPU clock. This allows our code to
103	; synchronize better and reduce horizontal shaking.
104	lda #$08
105	sta $2001
106
107	; Delay 4739 cycles, well into frame
108	ldx #4
109	ldy #176
110:	dey
111	bne :-
112	dex
113	bne :-
114
115	nop
116
117	; Disable BG. Now electron beam color can be set by
118	; VRAM address pointing into palette.
119	lda #0
120	sta $2001
121
122	; Draw palette
123	ldy #0		; Y = color
124triplet:
125
126; Draws one scanline of palette. Takes 106 cycles.
127.macro draw_row
128	nop
129	nop
130	nop
131	tya
132	and #$18
133	asl a
134	ldx #$3F
135	stx $2006
136	stx $2006
137	tax
138	stx $2007
139	inx
140	stx $2007
141	inx
142	stx $2007
143	inx
144	stx $2007
145	inx
146	stx $2007
147	inx
148	stx $2007
149	inx
150	stx $2007
151	inx
152	stx $2007
153	inx
154	stx $2007
155	inx
156	stx $2007
157	inx
158	stx $2007
159	inx
160	stx $2007
161	inx
162	stx $2007
163	inx
164	stx $2007
165.endmacro
166
167	draw_row
168
169	; Palette writes are delayed a line, since VRAM address
170	; increments just after $2007 write. So we don't set
171	; color tint until after first row of triplet
172	tya
173	and #$E0
174	sta $2001
175
176	draw_row
177
178	iny
179	iny
180	iny
181	nop
182
183	draw_row
184
185	iny
186	beq :+		; loop is more than 128 bytes, argh
187	jmp triplet
188:
189
190	nop
191
192	; Delay 2869 cycles
193	ldy #239
194:	pha
195	pla
196	dey
197	bne :-
198
199	; Delay extra cycle every other frame
200	inc even_frame
201	lda even_frame
202	lsr a
203	bcs :+
204:	jmp begin_frame
205