1; Copyright (c) 2020 Valve Corporation
2;
3; Permission is hereby granted, free of charge, to any person obtaining a
4; copy of this software and associated documentation files (the "Software"),
5; to deal in the Software without restriction, including without limitation
6; the rights to use, copy, modify, merge, publish, distribute, sublicense,
7; and/or sell copies of the Software, and to permit persons to whom the
8; Software is furnished to do so, subject to the following conditions:
9;
10; The above copyright notice and this permission notice (including the next
11; paragraph) shall be included in all copies or substantial portions of the
12; Software.
13;
14; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20; SOFTWARE.
21;
22;
23; This file is the source for a simple mock firmware used to regression test
24; the afuc assembler/disassembler. Note, it won't actually work if you try to
25; load it on the GPU! First this is assembled, compared to the reference
26; binary, then disassambled and compared to the reference disassembly. We do
27; this to avoid having to host the actual firmware, especially the disassembled
28; version, in Mesa.
29[01000001]
30[01000000]
31loc02:
32; packet table loading:
33mov $01, 0x0830 ; CP_SQE_INSTR_BASE
34mov $02, 0x0002
35cwrite $01, [$00 + @REG_READ_ADDR], 0x0
36cwrite $02, [$00 + @REG_READ_DWORDS], 0x0
37; move hi/lo of SQE fw addrs to registers:
38mov $01, $regdata
39mov $02, $regdata
40; skip first dword
41add $01, $01, 0x0004
42addhi $02, $02, 0x0000
43mov $03, 0x0001
44cwrite $01, [$00 + @MEM_READ_ADDR], 0x0
45cwrite $02, [$00 + @MEM_READ_ADDR+0x1], 0x0
46cwrite $03, [$00 + @MEM_READ_DWORDS], 0x0
47; read 2nd dword of fw, and add offset (minus 4 because we skipped first dword)
48; to base address of sqe fw
49rot $04, $memdata, 0x0008
50ushr $04, $04, 0x0006
51sub $04, $04, 0x0004
52add $01, $01, $04
53addhi $02, $02, 0x0000
54
55; load packet table:
56mov $rem, 0x0080
57cwrite $01, [$00 + @MEM_READ_ADDR], 0x0
58cwrite $02, [$00 + @MEM_READ_ADDR+0x1], 0x0
59cwrite $02, [$00 + @LOAD_STORE_HI], 0x0
60cwrite $rem, [$00 + @MEM_READ_DWORDS], 0x0
61cwrite $00, [$00 + @PACKET_TABLE_WRITE_ADDR], 0x0
62(rep)cwrite $memdata, [$00 + @PACKET_TABLE_WRITE], 0x0
63
64mov $02, 0x883
65mov $03, 0xbeef
66mov $04, 0xdead << 16
67or $03, $03, $04
68cwrite $02, [$00 + @REG_WRITE_ADDR], 0x0
69cwrite $03, [$00 + @REG_WRITE], 0x0
70waitin
71mov $01, $data
72
73CP_ME_INIT:
74; test label-as-immediate feature
75mov $02, #loc02 ; should be 0x0002
76waitin
77mov $01, $data
78
79CP_MEM_WRITE:
80; test $addr + (rep) + (xmovN) with ALU
81mov $addr, 0xa0 << 24
82mov $02, 4
83(xmov1)add $data, $02, $data
84mov $addr, 0xa204 << 16
85(rep)(xmov3)mov $data, $data
86waitin
87mov $01, $data
88
89CP_SCRATCH_WRITE:
90; test (rep) + flags + non-zero offset with cwrite
91; TODO: 0x4 flag is actually pre-increment addressing, handle it as such
92mov $02, 0xff
93(rep)cwrite $data, [$02 + 0x001], 0x4
94waitin
95mov $01, $data
96
97CP_SET_SECURE_MODE:
98; test setsecure
99mov $02, $data
100setsecure $02, #setsecure_success
101err:
102jump #err
103nop
104setsecure_success:
105waitin
106mov $01, $data
107
108euclid:
109; Euclid's algorithm in afuc: https://en.wikipedia.org/wiki/Euclidean_algorithm
110; Since afuc doesn't do modulo, we implement the subtraction-based version.
111;
112; Demonstrates/tests comparisons and conditional branches. This also
113; demonstrates the common trick of branching in a delay slot. Note that if a
114; branch is taken and its delay slot includes another branch, the second
115; branch cannot also be taken, which is why the last branch in the sequence
116; cannot be unconditional.
117;
118; Inputs are in $02 and $03, and output is in $02.
119cmp $04, $02, $03
120breq $04, b0, #euclid_exit
121brne $04, b1, #euclid_gt
122breq $04, b2, #euclid
123sub $03, $03, $02
124euclid_gt:
125jump #euclid
126sub $02, $02, $03
127euclid_exit:
128ret
129nop
130
131CP_REG_RMW:
132; Test various ALU instructions, and read/write $regdata
133cwrite $data, [$00 + @REG_READ_ADDR], 0x0
134add $02, $regdata, 0x42
135addhi $03, $00, $regdata
136sub $02, $02, $regdata
137call #euclid
138subhi $03, $03, $regdata
139and $02, $02, $regdata
140or $02, $02, 0x1
141xor $02, $02, 0x1
142not $02, $02
143shl $02, $02, $regdata
144ushr $02, $02, $regdata
145ishr $02, $02, $regdata
146rot $02, $02, $regdata
147min $02, $02, $regdata
148max $02, $02, $regdata
149mul8 $02, $02, $regdata
150msb $02, $02
151mov $usraddr, $data
152mov $data, $02
153waitin
154mov $01, $data
155
156CP_MEMCPY:
157; implement CP_MEMCPY using load/store instructions
158mov $02, $data
159mov $03, $data
160mov $04, $data
161mov $05, $data
162mov $06, $data
163cpy_header:
164breq $06, 0, #cpy_exit
165cwrite $03, [$00 + @LOAD_STORE_HI], 0x0
166load $07, [$02 + 0x004], 0x4
167cwrite $05, [$00 + @LOAD_STORE_HI], 0x0
168jump #cpy_header
169store $07, [$04 + 0x004], 0x4
170cpy_exit:
171waitin
172mov $01, $data
173
174CP_MEM_TO_MEM:
175; implement CP_MEMCPY using mem read control regs
176; tests @FOO+0x1 for 64-bit control regs, and reading/writing $rem
177cwrite $data, [$00 + @MEM_READ_ADDR], 0x0
178cwrite $data, [$00 + @MEM_READ_ADDR+1], 0x0
179mov $02, $data
180cwrite $data, [$00 + @LOAD_STORE_HI], 0x0
181mov $rem, $data
182cwrite $rem, [$00 + @MEM_READ_DWORDS], 0x0
183(rep)store $memdata, [$02 + 0x004], 0x4
184waitin
185mov $01, $data
186
187UNKN15:
188; test preemptleave + iret + conditional branch w/ immed
189cread $02, [$00 + 0x101], 0x0
190brne $02, 0x0001, #exit_iret
191nop
192preemptleave #err
193nop
194nop
195nop
196waitin
197mov $01, $data
198exit_iret:
199iret
200nop
201
202UNKN0:
203UNKN1:
204UNKN2:
205UNKN3:
206PKT4:
207UNKN5:
208UNKN6:
209UNKN7:
210UNKN8:
211UNKN9:
212UNKN10:
213UNKN11:
214UNKN12:
215UNKN13:
216UNKN14:
217CP_NOP:
218CP_RECORD_PFP_TIMESTAMP:
219CP_WAIT_MEM_WRITES:
220CP_WAIT_FOR_ME:
221CP_WAIT_MEM_GTE:
222UNKN21:
223UNKN22:
224UNKN23:
225UNKN24:
226CP_DRAW_PRED_ENABLE_GLOBAL:
227CP_DRAW_PRED_ENABLE_LOCAL:
228UNKN27:
229CP_PREEMPT_ENABLE:
230CP_SKIP_IB2_ENABLE_GLOBAL:
231CP_PREEMPT_TOKEN:
232UNKN31:
233UNKN32:
234CP_DRAW_INDX:
235CP_SKIP_IB2_ENABLE_LOCAL:
236CP_DRAW_AUTO:
237CP_SET_STATE:
238CP_WAIT_FOR_IDLE:
239CP_IM_LOAD:
240CP_DRAW_INDIRECT:
241CP_DRAW_INDX_INDIRECT:
242CP_DRAW_INDIRECT_MULTI:
243CP_IM_LOAD_IMMEDIATE:
244CP_BLIT:
245CP_SET_CONSTANT:
246CP_SET_BIN_DATA5_OFFSET:
247CP_SET_BIN_DATA5:
248UNKN48:
249CP_RUN_OPENCL:
250CP_LOAD_STATE6_GEOM:
251CP_EXEC_CS:
252CP_LOAD_STATE6_FRAG:
253CP_SET_SUBDRAW_SIZE:
254CP_LOAD_STATE6:
255CP_INDIRECT_BUFFER_PFD:
256CP_DRAW_INDX_OFFSET:
257CP_REG_TEST:
258CP_COND_INDIRECT_BUFFER_PFE:
259CP_INVALIDATE_STATE:
260CP_WAIT_REG_MEM:
261CP_REG_TO_MEM:
262CP_INDIRECT_BUFFER:
263CP_INTERRUPT:
264CP_EXEC_CS_INDIRECT:
265CP_MEM_TO_REG:
266CP_SET_DRAW_STATE:
267CP_COND_EXEC:
268CP_COND_WRITE5:
269CP_EVENT_WRITE:
270CP_COND_REG_EXEC:
271UNKN73:
272CP_REG_TO_SCRATCH:
273CP_SET_DRAW_INIT_FLAGS:
274CP_SCRATCH_TO_REG:
275CP_DRAW_PRED_SET:
276CP_MEM_WRITE_CNTR:
277CP_START_BIN:
278CP_END_BIN:
279CP_WAIT_REG_EQ:
280CP_SMMU_TABLE_UPDATE:
281UNKN84:
282CP_SET_CTXSWITCH_IB:
283CP_SET_PSEUDO_REG:
284CP_INDIRECT_BUFFER_CHAIN:
285CP_EVENT_WRITE_SHD:
286CP_EVENT_WRITE_CFL:
287UNKN90:
288CP_EVENT_WRITE_ZPD:
289CP_CONTEXT_REG_BUNCH:
290CP_WAIT_IB_PFD_COMPLETE:
291CP_CONTEXT_UPDATE:
292CP_SET_PROTECTED_MODE:
293UNKN96:
294UNKN97:
295UNKN98:
296CP_SET_MODE:
297CP_SET_VISIBILITY_OVERRIDE:
298CP_SET_MARKER:
299UNKN103:
300UNKN104:
301UNKN105:
302UNKN106:
303UNKN107:
304UNKN108:
305CP_REG_WRITE:
306UNKN110:
307CP_BOOTSTRAP_UCODE:
308CP_WAIT_TWO_REGS:
309CP_TEST_TWO_MEMS:
310CP_REG_TO_MEM_OFFSET_REG:
311CP_REG_TO_MEM_OFFSET_MEM:
312UNKN118:
313UNKN119:
314CP_REG_WR_NO_CTXT:
315UNKN121:
316UNKN122:
317UNKN123:
318UNKN124:
319UNKN125:
320UNKN126:
321UNKN127:
322        waitin
323        mov $01, $data
324