1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22 */
23 
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <unistd.h>
27 #include <sys/types.h>
28 #ifdef MAJOR_IN_SYSMACROS
29 #include <sys/sysmacros.h>
30 #endif
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #ifdef HAVE_ALLOCA_H
34 # include <alloca.h>
35 #endif
36 #include <sys/wait.h>
37 
38 #include "CUnit/Basic.h"
39 
40 #include "amdgpu_test.h"
41 #include "amdgpu_drm.h"
42 #include "util_math.h"
43 
44 static  amdgpu_device_handle device_handle;
45 static  uint32_t  major_version;
46 static  uint32_t  minor_version;
47 static  uint32_t  family_id;
48 
49 static void amdgpu_query_info_test(void);
50 static void amdgpu_command_submission_gfx(void);
51 static void amdgpu_command_submission_compute(void);
52 static void amdgpu_command_submission_multi_fence(void);
53 static void amdgpu_command_submission_sdma(void);
54 static void amdgpu_userptr_test(void);
55 static void amdgpu_semaphore_test(void);
56 static void amdgpu_sync_dependency_test(void);
57 static void amdgpu_bo_eviction_test(void);
58 static void amdgpu_compute_dispatch_test(void);
59 static void amdgpu_gfx_dispatch_test(void);
60 static void amdgpu_draw_test(void);
61 static void amdgpu_gpu_reset_test(void);
62 
63 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
64 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
65 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
66 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
67 				       unsigned ip_type,
68 				       int instance, int pm4_dw, uint32_t *pm4_src,
69 				       int res_cnt, amdgpu_bo_handle *resources,
70 				       struct amdgpu_cs_ib_info *ib_info,
71 				       struct amdgpu_cs_request *ibs_request);
72 
73 CU_TestInfo basic_tests[] = {
74 	{ "Query Info Test",  amdgpu_query_info_test },
75 	{ "Userptr Test",  amdgpu_userptr_test },
76 	{ "bo eviction Test",  amdgpu_bo_eviction_test },
77 	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
78 	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
79 	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
80 	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
81 	{ "SW semaphore Test",  amdgpu_semaphore_test },
82 	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
83 	{ "Dispatch Test (Compute)",  amdgpu_compute_dispatch_test },
84 	{ "Dispatch Test (GFX)",  amdgpu_gfx_dispatch_test },
85 	{ "Draw Test",  amdgpu_draw_test },
86 	{ "GPU reset Test", amdgpu_gpu_reset_test },
87 	CU_TEST_INFO_NULL,
88 };
89 #define BUFFER_SIZE (8 * 1024)
90 #define SDMA_PKT_HEADER_op_offset 0
91 #define SDMA_PKT_HEADER_op_mask   0x000000FF
92 #define SDMA_PKT_HEADER_op_shift  0
93 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
94 #define SDMA_OPCODE_CONSTANT_FILL  11
95 #       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
96 	/* 0 = byte fill
97 	 * 2 = DW fill
98 	 */
99 #define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
100 					(((sub_op) & 0xFF) << 8) |	\
101 					(((op) & 0xFF) << 0))
102 #define	SDMA_OPCODE_WRITE				  2
103 #       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
104 #       define SDMA_WRTIE_SUB_OPCODE_TILED                1
105 
106 #define	SDMA_OPCODE_COPY				  1
107 #       define SDMA_COPY_SUB_OPCODE_LINEAR                0
108 
109 #define GFX_COMPUTE_NOP  0xffff1000
110 #define SDMA_NOP  0x0
111 
112 /* PM4 */
113 #define	PACKET_TYPE0	0
114 #define	PACKET_TYPE1	1
115 #define	PACKET_TYPE2	2
116 #define	PACKET_TYPE3	3
117 
118 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
119 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
120 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
121 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
122 #define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
123 			 ((reg) & 0xFFFF) |			\
124 			 ((n) & 0x3FFF) << 16)
125 #define CP_PACKET2			0x80000000
126 #define		PACKET2_PAD_SHIFT		0
127 #define		PACKET2_PAD_MASK		(0x3fffffff << 0)
128 
129 #define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
130 
131 #define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
132 			 (((op) & 0xFF) << 8) |				\
133 			 ((n) & 0x3FFF) << 16)
134 #define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
135 
136 /* Packet 3 types */
137 #define	PACKET3_NOP					0x10
138 
139 #define	PACKET3_WRITE_DATA				0x37
140 #define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
141 		/* 0 - register
142 		 * 1 - memory (sync - via GRBM)
143 		 * 2 - gl2
144 		 * 3 - gds
145 		 * 4 - reserved
146 		 * 5 - memory (async - direct)
147 		 */
148 #define		WR_ONE_ADDR                             (1 << 16)
149 #define		WR_CONFIRM                              (1 << 20)
150 #define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
151 		/* 0 - LRU
152 		 * 1 - Stream
153 		 */
154 #define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
155 		/* 0 - me
156 		 * 1 - pfp
157 		 * 2 - ce
158 		 */
159 
160 #define	PACKET3_DMA_DATA				0x50
161 /* 1. header
162  * 2. CONTROL
163  * 3. SRC_ADDR_LO or DATA [31:0]
164  * 4. SRC_ADDR_HI [31:0]
165  * 5. DST_ADDR_LO [31:0]
166  * 6. DST_ADDR_HI [7:0]
167  * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
168  */
169 /* CONTROL */
170 #              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
171 		/* 0 - ME
172 		 * 1 - PFP
173 		 */
174 #              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
175 		/* 0 - LRU
176 		 * 1 - Stream
177 		 * 2 - Bypass
178 		 */
179 #              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
180 #              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
181 		/* 0 - DST_ADDR using DAS
182 		 * 1 - GDS
183 		 * 3 - DST_ADDR using L2
184 		 */
185 #              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
186 		/* 0 - LRU
187 		 * 1 - Stream
188 		 * 2 - Bypass
189 		 */
190 #              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
191 #              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
192 		/* 0 - SRC_ADDR using SAS
193 		 * 1 - GDS
194 		 * 2 - DATA
195 		 * 3 - SRC_ADDR using L2
196 		 */
197 #              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
198 /* COMMAND */
199 #              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
200 #              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
201 		/* 0 - none
202 		 * 1 - 8 in 16
203 		 * 2 - 8 in 32
204 		 * 3 - 8 in 64
205 		 */
206 #              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
207 		/* 0 - none
208 		 * 1 - 8 in 16
209 		 * 2 - 8 in 32
210 		 * 3 - 8 in 64
211 		 */
212 #              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
213 		/* 0 - memory
214 		 * 1 - register
215 		 */
216 #              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
217 		/* 0 - memory
218 		 * 1 - register
219 		 */
220 #              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
221 #              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
222 #              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
223 
224 #define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
225 						(((b) & 0x1) << 26) |		\
226 						(((t) & 0x1) << 23) |		\
227 						(((s) & 0x1) << 22) |		\
228 						(((cnt) & 0xFFFFF) << 0))
229 #define	SDMA_OPCODE_COPY_SI	3
230 #define SDMA_OPCODE_CONSTANT_FILL_SI	13
231 #define SDMA_NOP_SI  0xf
232 #define GFX_COMPUTE_NOP_SI 0x80000000
233 #define	PACKET3_DMA_DATA_SI	0x41
234 #              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
235 		/* 0 - ME
236 		 * 1 - PFP
237 		 */
238 #              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
239 		/* 0 - DST_ADDR using DAS
240 		 * 1 - GDS
241 		 * 3 - DST_ADDR using L2
242 		 */
243 #              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
244 		/* 0 - SRC_ADDR using SAS
245 		 * 1 - GDS
246 		 * 2 - DATA
247 		 * 3 - SRC_ADDR using L2
248 		 */
249 #              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
250 
251 
252 #define PKT3_CONTEXT_CONTROL                   0x28
253 #define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
254 #define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
255 #define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
256 
257 #define PKT3_CLEAR_STATE                       0x12
258 
259 #define PKT3_SET_SH_REG                        0x76
260 #define		PACKET3_SET_SH_REG_START			0x00002c00
261 
262 #define	PACKET3_DISPATCH_DIRECT				0x15
263 #define PACKET3_EVENT_WRITE				0x46
264 #define PACKET3_ACQUIRE_MEM				0x58
265 #define PACKET3_SET_CONTEXT_REG				0x69
266 #define PACKET3_SET_UCONFIG_REG				0x79
267 #define PACKET3_DRAW_INDEX_AUTO				0x2D
268 /* gfx 8 */
269 #define mmCOMPUTE_PGM_LO                                                        0x2e0c
270 #define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
271 #define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
272 #define mmCOMPUTE_USER_DATA_0                                                   0x2e40
273 #define mmCOMPUTE_USER_DATA_1                                                   0x2e41
274 #define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
275 #define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
276 
277 
278 
279 #define SWAP_32(num) (((num & 0xff000000) >> 24) | \
280 		      ((num & 0x0000ff00) << 8) | \
281 		      ((num & 0x00ff0000) >> 8) | \
282 		      ((num & 0x000000ff) << 24))
283 
284 
285 /* Shader code
286  * void main()
287 {
288 
289 	float x = some_input;
290 		for (unsigned i = 0; i < 1000000; i++)
291   	x = sin(x);
292 
293 	u[0] = 42u;
294 }
295 */
296 
297 static  uint32_t shader_bin[] = {
298 	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
299 	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
300 	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
301 	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
302 };
303 
304 #define CODE_OFFSET 512
305 #define DATA_OFFSET 1024
306 
307 enum cs_type {
308 	CS_BUFFERCLEAR,
309 	CS_BUFFERCOPY
310 };
311 
312 static const uint32_t bufferclear_cs_shader_gfx9[] = {
313     0xD1FD0000, 0x04010C08, 0x7E020204, 0x7E040205,
314     0x7E060206, 0x7E080207, 0xE01C2000, 0x80000100,
315     0xBF810000
316 };
317 
318 static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
319 	{0x2e12, 0x000C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x000C0041 },
320 	{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
321 	{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
322 	{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
323 	{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
324 };
325 
326 static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
327 
328 static const uint32_t buffercopy_cs_shader_gfx9[] = {
329     0xD1FD0000, 0x04010C08, 0xE00C2000, 0x80000100,
330     0xBF8C0F70, 0xE01C2000, 0x80010100, 0xBF810000
331 };
332 
333 static const uint32_t preamblecache_gfx9[] = {
334 	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
335 	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
336 	0xc0026900, 0xb4,  0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
337 	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
338 	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
339 	0xc0016900, 0x2d5, 0x10000, 0xc0016900,  0x2dc, 0x0,
340 	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
341 	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
342 	0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
343 	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
344 	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
345 	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
346 	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
347 	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
348 	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
349 	0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
350 	0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
351 	0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
352 	0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
353 	0xc0017900, 0x24b, 0x0
354 };
355 
356 enum ps_type {
357 	PS_CONST,
358 	PS_TEX
359 };
360 
361 static const uint32_t ps_const_shader_gfx9[] = {
362     0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
363     0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
364     0xC4001C0F, 0x00000100, 0xBF810000
365 };
366 
367 static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
368 
369 static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
370     {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
371      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
372      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
373      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
374      { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
375      { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
376      { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
377      { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
378      { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
379      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
380     }
381 };
382 
383 static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
384     0x00000004
385 };
386 
387 static const uint32_t ps_num_sh_registers_gfx9 = 2;
388 
389 static const uint32_t ps_const_sh_registers_gfx9[][2] = {
390     {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
391     {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
392 };
393 
394 static const uint32_t ps_num_context_registers_gfx9 = 7;
395 
396 static const uint32_t ps_const_context_reg_gfx9[][2] = {
397     {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
398     {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL,       0x00000000 },
399     {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
400     {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
401     {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
402     {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
403     {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004 }
404 };
405 
406 static const uint32_t ps_tex_shader_gfx9[] = {
407     0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
408     0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
409     0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
410     0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
411     0x00000100, 0xBF810000
412 };
413 
414 static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
415     0x0000000B
416 };
417 
418 static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
419 
420 static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
421     {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
422      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
423      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
424      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
425      { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
426      { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
427      { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
428      { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
429      { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
430      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
431     }
432 };
433 
434 static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
435     {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
436     {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
437 };
438 
439 static const uint32_t ps_tex_context_reg_gfx9[][2] = {
440     {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
441     {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL,       0x00000001 },
442     {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
443     {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
444     {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
445     {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
446     {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004  }
447 };
448 
449 static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
450     0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
451     0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
452     0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
453     0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
454     0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
455     0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
456     0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
457     0xC400020F, 0x05060403, 0xBF810000
458 };
459 
460 static const uint32_t cached_cmd_gfx9[] = {
461 	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
462 	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
463 	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
464 	0xc0056900, 0x105, 0x0, 0x0,  0x0, 0x0, 0x1a,
465 	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
466 	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
467 	0xc0026900, 0x292, 0x20, 0x60201b8,
468 	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
469 };
470 
amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev,unsigned size,unsigned alignment,unsigned heap,uint64_t alloc_flags,uint64_t mapping_flags,amdgpu_bo_handle * bo,void ** cpu,uint64_t * mc_address,amdgpu_va_handle * va_handle)471 int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
472 			unsigned alignment, unsigned heap, uint64_t alloc_flags,
473 			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
474 			uint64_t *mc_address,
475 			amdgpu_va_handle *va_handle)
476 {
477 	struct amdgpu_bo_alloc_request request = {};
478 	amdgpu_bo_handle buf_handle;
479 	amdgpu_va_handle handle;
480 	uint64_t vmc_addr;
481 	int r;
482 
483 	request.alloc_size = size;
484 	request.phys_alignment = alignment;
485 	request.preferred_heap = heap;
486 	request.flags = alloc_flags;
487 
488 	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
489 	if (r)
490 		return r;
491 
492 	r = amdgpu_va_range_alloc(dev,
493 				  amdgpu_gpu_va_range_general,
494 				  size, alignment, 0, &vmc_addr,
495 				  &handle, 0);
496 	if (r)
497 		goto error_va_alloc;
498 
499 	r = amdgpu_bo_va_op_raw(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
500 				   AMDGPU_VM_PAGE_READABLE |
501 				   AMDGPU_VM_PAGE_WRITEABLE |
502 				   AMDGPU_VM_PAGE_EXECUTABLE |
503 				   mapping_flags,
504 				   AMDGPU_VA_OP_MAP);
505 	if (r)
506 		goto error_va_map;
507 
508 	r = amdgpu_bo_cpu_map(buf_handle, cpu);
509 	if (r)
510 		goto error_cpu_map;
511 
512 	*bo = buf_handle;
513 	*mc_address = vmc_addr;
514 	*va_handle = handle;
515 
516 	return 0;
517 
518  error_cpu_map:
519 	amdgpu_bo_cpu_unmap(buf_handle);
520 
521  error_va_map:
522 	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
523 
524  error_va_alloc:
525 	amdgpu_bo_free(buf_handle);
526 	return r;
527 }
528 
529 
530 
suite_basic_tests_init(void)531 int suite_basic_tests_init(void)
532 {
533 	struct amdgpu_gpu_info gpu_info = {0};
534 	int r;
535 
536 	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
537 				   &minor_version, &device_handle);
538 
539 	if (r) {
540 		if ((r == -EACCES) && (errno == EACCES))
541 			printf("\n\nError:%s. "
542 				"Hint:Try to run this test program as root.",
543 				strerror(errno));
544 		return CUE_SINIT_FAILED;
545 	}
546 
547 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
548 	if (r)
549 		return CUE_SINIT_FAILED;
550 
551 	family_id = gpu_info.family_id;
552 
553 	return CUE_SUCCESS;
554 }
555 
suite_basic_tests_clean(void)556 int suite_basic_tests_clean(void)
557 {
558 	int r = amdgpu_device_deinitialize(device_handle);
559 
560 	if (r == 0)
561 		return CUE_SUCCESS;
562 	else
563 		return CUE_SCLEAN_FAILED;
564 }
565 
amdgpu_query_info_test(void)566 static void amdgpu_query_info_test(void)
567 {
568 	struct amdgpu_gpu_info gpu_info = {0};
569 	uint32_t version, feature;
570 	int r;
571 
572 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
573 	CU_ASSERT_EQUAL(r, 0);
574 
575 	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
576 					  0, &version, &feature);
577 	CU_ASSERT_EQUAL(r, 0);
578 }
579 
amdgpu_command_submission_gfx_separate_ibs(void)580 static void amdgpu_command_submission_gfx_separate_ibs(void)
581 {
582 	amdgpu_context_handle context_handle;
583 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
584 	void *ib_result_cpu, *ib_result_ce_cpu;
585 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
586 	struct amdgpu_cs_request ibs_request = {0};
587 	struct amdgpu_cs_ib_info ib_info[2];
588 	struct amdgpu_cs_fence fence_status = {0};
589 	uint32_t *ptr;
590 	uint32_t expired;
591 	amdgpu_bo_list_handle bo_list;
592 	amdgpu_va_handle va_handle, va_handle_ce;
593 	int r, i = 0;
594 
595 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
596 	CU_ASSERT_EQUAL(r, 0);
597 
598 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
599 				    AMDGPU_GEM_DOMAIN_GTT, 0,
600 				    &ib_result_handle, &ib_result_cpu,
601 				    &ib_result_mc_address, &va_handle);
602 	CU_ASSERT_EQUAL(r, 0);
603 
604 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
605 				    AMDGPU_GEM_DOMAIN_GTT, 0,
606 				    &ib_result_ce_handle, &ib_result_ce_cpu,
607 				    &ib_result_ce_mc_address, &va_handle_ce);
608 	CU_ASSERT_EQUAL(r, 0);
609 
610 	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
611 			       ib_result_ce_handle, &bo_list);
612 	CU_ASSERT_EQUAL(r, 0);
613 
614 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
615 
616 	/* IT_SET_CE_DE_COUNTERS */
617 	ptr = ib_result_ce_cpu;
618 	if (family_id != AMDGPU_FAMILY_SI) {
619 		ptr[i++] = 0xc0008900;
620 		ptr[i++] = 0;
621 	}
622 	ptr[i++] = 0xc0008400;
623 	ptr[i++] = 1;
624 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
625 	ib_info[0].size = i;
626 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
627 
628 	/* IT_WAIT_ON_CE_COUNTER */
629 	ptr = ib_result_cpu;
630 	ptr[0] = 0xc0008600;
631 	ptr[1] = 0x00000001;
632 	ib_info[1].ib_mc_address = ib_result_mc_address;
633 	ib_info[1].size = 2;
634 
635 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
636 	ibs_request.number_of_ibs = 2;
637 	ibs_request.ibs = ib_info;
638 	ibs_request.resources = bo_list;
639 	ibs_request.fence_info.handle = NULL;
640 
641 	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
642 
643 	CU_ASSERT_EQUAL(r, 0);
644 
645 	fence_status.context = context_handle;
646 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
647 	fence_status.ip_instance = 0;
648 	fence_status.fence = ibs_request.seq_no;
649 
650 	r = amdgpu_cs_query_fence_status(&fence_status,
651 					 AMDGPU_TIMEOUT_INFINITE,
652 					 0, &expired);
653 	CU_ASSERT_EQUAL(r, 0);
654 
655 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
656 				     ib_result_mc_address, 4096);
657 	CU_ASSERT_EQUAL(r, 0);
658 
659 	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
660 				     ib_result_ce_mc_address, 4096);
661 	CU_ASSERT_EQUAL(r, 0);
662 
663 	r = amdgpu_bo_list_destroy(bo_list);
664 	CU_ASSERT_EQUAL(r, 0);
665 
666 	r = amdgpu_cs_ctx_free(context_handle);
667 	CU_ASSERT_EQUAL(r, 0);
668 
669 }
670 
amdgpu_command_submission_gfx_shared_ib(void)671 static void amdgpu_command_submission_gfx_shared_ib(void)
672 {
673 	amdgpu_context_handle context_handle;
674 	amdgpu_bo_handle ib_result_handle;
675 	void *ib_result_cpu;
676 	uint64_t ib_result_mc_address;
677 	struct amdgpu_cs_request ibs_request = {0};
678 	struct amdgpu_cs_ib_info ib_info[2];
679 	struct amdgpu_cs_fence fence_status = {0};
680 	uint32_t *ptr;
681 	uint32_t expired;
682 	amdgpu_bo_list_handle bo_list;
683 	amdgpu_va_handle va_handle;
684 	int r, i = 0;
685 
686 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
687 	CU_ASSERT_EQUAL(r, 0);
688 
689 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
690 				    AMDGPU_GEM_DOMAIN_GTT, 0,
691 				    &ib_result_handle, &ib_result_cpu,
692 				    &ib_result_mc_address, &va_handle);
693 	CU_ASSERT_EQUAL(r, 0);
694 
695 	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
696 			       &bo_list);
697 	CU_ASSERT_EQUAL(r, 0);
698 
699 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
700 
701 	/* IT_SET_CE_DE_COUNTERS */
702 	ptr = ib_result_cpu;
703 	if (family_id != AMDGPU_FAMILY_SI) {
704 		ptr[i++] = 0xc0008900;
705 		ptr[i++] = 0;
706 	}
707 	ptr[i++] = 0xc0008400;
708 	ptr[i++] = 1;
709 	ib_info[0].ib_mc_address = ib_result_mc_address;
710 	ib_info[0].size = i;
711 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
712 
713 	ptr = (uint32_t *)ib_result_cpu + 4;
714 	ptr[0] = 0xc0008600;
715 	ptr[1] = 0x00000001;
716 	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
717 	ib_info[1].size = 2;
718 
719 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
720 	ibs_request.number_of_ibs = 2;
721 	ibs_request.ibs = ib_info;
722 	ibs_request.resources = bo_list;
723 	ibs_request.fence_info.handle = NULL;
724 
725 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
726 
727 	CU_ASSERT_EQUAL(r, 0);
728 
729 	fence_status.context = context_handle;
730 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
731 	fence_status.ip_instance = 0;
732 	fence_status.fence = ibs_request.seq_no;
733 
734 	r = amdgpu_cs_query_fence_status(&fence_status,
735 					 AMDGPU_TIMEOUT_INFINITE,
736 					 0, &expired);
737 	CU_ASSERT_EQUAL(r, 0);
738 
739 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
740 				     ib_result_mc_address, 4096);
741 	CU_ASSERT_EQUAL(r, 0);
742 
743 	r = amdgpu_bo_list_destroy(bo_list);
744 	CU_ASSERT_EQUAL(r, 0);
745 
746 	r = amdgpu_cs_ctx_free(context_handle);
747 	CU_ASSERT_EQUAL(r, 0);
748 }
749 
amdgpu_command_submission_gfx_cp_write_data(void)750 static void amdgpu_command_submission_gfx_cp_write_data(void)
751 {
752 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
753 }
754 
amdgpu_command_submission_gfx_cp_const_fill(void)755 static void amdgpu_command_submission_gfx_cp_const_fill(void)
756 {
757 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
758 }
759 
amdgpu_command_submission_gfx_cp_copy_data(void)760 static void amdgpu_command_submission_gfx_cp_copy_data(void)
761 {
762 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
763 }
764 
amdgpu_bo_eviction_test(void)765 static void amdgpu_bo_eviction_test(void)
766 {
767 	const int sdma_write_length = 1024;
768 	const int pm4_dw = 256;
769 	amdgpu_context_handle context_handle;
770 	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
771 	amdgpu_bo_handle *resources;
772 	uint32_t *pm4;
773 	struct amdgpu_cs_ib_info *ib_info;
774 	struct amdgpu_cs_request *ibs_request;
775 	uint64_t bo1_mc, bo2_mc;
776 	volatile unsigned char *bo1_cpu, *bo2_cpu;
777 	int i, j, r, loop1, loop2;
778 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
779 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
780 	struct amdgpu_heap_info vram_info, gtt_info;
781 
782 	pm4 = calloc(pm4_dw, sizeof(*pm4));
783 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
784 
785 	ib_info = calloc(1, sizeof(*ib_info));
786 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
787 
788 	ibs_request = calloc(1, sizeof(*ibs_request));
789 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
790 
791 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
792 	CU_ASSERT_EQUAL(r, 0);
793 
794 	/* prepare resource */
795 	resources = calloc(4, sizeof(amdgpu_bo_handle));
796 	CU_ASSERT_NOT_EQUAL(resources, NULL);
797 
798 	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
799 				   0, &vram_info);
800 	CU_ASSERT_EQUAL(r, 0);
801 
802 	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
803 				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
804 	CU_ASSERT_EQUAL(r, 0);
805 	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
806 				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
807 	CU_ASSERT_EQUAL(r, 0);
808 
809 	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
810 				   0, &gtt_info);
811 	CU_ASSERT_EQUAL(r, 0);
812 
813 	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
814 				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
815 	CU_ASSERT_EQUAL(r, 0);
816 	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
817 				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
818 	CU_ASSERT_EQUAL(r, 0);
819 
820 
821 
822 	loop1 = loop2 = 0;
823 	/* run 9 circle to test all mapping combination */
824 	while(loop1 < 2) {
825 		while(loop2 < 2) {
826 			/* allocate UC bo1for sDMA use */
827 			r = amdgpu_bo_alloc_and_map(device_handle,
828 						    sdma_write_length, 4096,
829 						    AMDGPU_GEM_DOMAIN_GTT,
830 						    gtt_flags[loop1], &bo1,
831 						    (void**)&bo1_cpu, &bo1_mc,
832 						    &bo1_va_handle);
833 			CU_ASSERT_EQUAL(r, 0);
834 
835 			/* set bo1 */
836 			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
837 
838 			/* allocate UC bo2 for sDMA use */
839 			r = amdgpu_bo_alloc_and_map(device_handle,
840 						    sdma_write_length, 4096,
841 						    AMDGPU_GEM_DOMAIN_GTT,
842 						    gtt_flags[loop2], &bo2,
843 						    (void**)&bo2_cpu, &bo2_mc,
844 						    &bo2_va_handle);
845 			CU_ASSERT_EQUAL(r, 0);
846 
847 			/* clear bo2 */
848 			memset((void*)bo2_cpu, 0, sdma_write_length);
849 
850 			resources[0] = bo1;
851 			resources[1] = bo2;
852 			resources[2] = vram_max[loop2];
853 			resources[3] = gtt_max[loop2];
854 
855 			/* fulfill PM4: test DMA copy linear */
856 			i = j = 0;
857 			if (family_id == AMDGPU_FAMILY_SI) {
858 				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
859 							  sdma_write_length);
860 				pm4[i++] = 0xffffffff & bo2_mc;
861 				pm4[i++] = 0xffffffff & bo1_mc;
862 				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
863 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
864 			} else {
865 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
866 				if (family_id >= AMDGPU_FAMILY_AI)
867 					pm4[i++] = sdma_write_length - 1;
868 				else
869 					pm4[i++] = sdma_write_length;
870 				pm4[i++] = 0;
871 				pm4[i++] = 0xffffffff & bo1_mc;
872 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
873 				pm4[i++] = 0xffffffff & bo2_mc;
874 				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
875 			}
876 
877 			amdgpu_test_exec_cs_helper(context_handle,
878 						   AMDGPU_HW_IP_DMA, 0,
879 						   i, pm4,
880 						   4, resources,
881 						   ib_info, ibs_request);
882 
883 			/* verify if SDMA test result meets with expected */
884 			i = 0;
885 			while(i < sdma_write_length) {
886 				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
887 			}
888 			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
889 						     sdma_write_length);
890 			CU_ASSERT_EQUAL(r, 0);
891 			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
892 						     sdma_write_length);
893 			CU_ASSERT_EQUAL(r, 0);
894 			loop2++;
895 		}
896 		loop2 = 0;
897 		loop1++;
898 	}
899 	amdgpu_bo_free(vram_max[0]);
900 	amdgpu_bo_free(vram_max[1]);
901 	amdgpu_bo_free(gtt_max[0]);
902 	amdgpu_bo_free(gtt_max[1]);
903 	/* clean resources */
904 	free(resources);
905 	free(ibs_request);
906 	free(ib_info);
907 	free(pm4);
908 
909 	/* end of test */
910 	r = amdgpu_cs_ctx_free(context_handle);
911 	CU_ASSERT_EQUAL(r, 0);
912 }
913 
914 
amdgpu_command_submission_gfx(void)915 static void amdgpu_command_submission_gfx(void)
916 {
917 	/* write data using the CP */
918 	amdgpu_command_submission_gfx_cp_write_data();
919 	/* const fill using the CP */
920 	amdgpu_command_submission_gfx_cp_const_fill();
921 	/* copy data using the CP */
922 	amdgpu_command_submission_gfx_cp_copy_data();
923 	/* separate IB buffers for multi-IB submission */
924 	amdgpu_command_submission_gfx_separate_ibs();
925 	/* shared IB buffer for multi-IB submission */
926 	amdgpu_command_submission_gfx_shared_ib();
927 }
928 
amdgpu_semaphore_test(void)929 static void amdgpu_semaphore_test(void)
930 {
931 	amdgpu_context_handle context_handle[2];
932 	amdgpu_semaphore_handle sem;
933 	amdgpu_bo_handle ib_result_handle[2];
934 	void *ib_result_cpu[2];
935 	uint64_t ib_result_mc_address[2];
936 	struct amdgpu_cs_request ibs_request[2] = {0};
937 	struct amdgpu_cs_ib_info ib_info[2] = {0};
938 	struct amdgpu_cs_fence fence_status = {0};
939 	uint32_t *ptr;
940 	uint32_t expired;
941 	uint32_t sdma_nop, gfx_nop;
942 	amdgpu_bo_list_handle bo_list[2];
943 	amdgpu_va_handle va_handle[2];
944 	int r, i;
945 
946 	if (family_id == AMDGPU_FAMILY_SI) {
947 		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
948 		gfx_nop = GFX_COMPUTE_NOP_SI;
949 	} else {
950 		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
951 		gfx_nop = GFX_COMPUTE_NOP;
952 	}
953 
954 	r = amdgpu_cs_create_semaphore(&sem);
955 	CU_ASSERT_EQUAL(r, 0);
956 	for (i = 0; i < 2; i++) {
957 		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
958 		CU_ASSERT_EQUAL(r, 0);
959 
960 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
961 					    AMDGPU_GEM_DOMAIN_GTT, 0,
962 					    &ib_result_handle[i], &ib_result_cpu[i],
963 					    &ib_result_mc_address[i], &va_handle[i]);
964 		CU_ASSERT_EQUAL(r, 0);
965 
966 		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
967 				       NULL, &bo_list[i]);
968 		CU_ASSERT_EQUAL(r, 0);
969 	}
970 
971 	/* 1. same context different engine */
972 	ptr = ib_result_cpu[0];
973 	ptr[0] = sdma_nop;
974 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
975 	ib_info[0].size = 1;
976 
977 	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
978 	ibs_request[0].number_of_ibs = 1;
979 	ibs_request[0].ibs = &ib_info[0];
980 	ibs_request[0].resources = bo_list[0];
981 	ibs_request[0].fence_info.handle = NULL;
982 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
983 	CU_ASSERT_EQUAL(r, 0);
984 	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
985 	CU_ASSERT_EQUAL(r, 0);
986 
987 	r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
988 	CU_ASSERT_EQUAL(r, 0);
989 	ptr = ib_result_cpu[1];
990 	ptr[0] = gfx_nop;
991 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
992 	ib_info[1].size = 1;
993 
994 	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
995 	ibs_request[1].number_of_ibs = 1;
996 	ibs_request[1].ibs = &ib_info[1];
997 	ibs_request[1].resources = bo_list[1];
998 	ibs_request[1].fence_info.handle = NULL;
999 
1000 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
1001 	CU_ASSERT_EQUAL(r, 0);
1002 
1003 	fence_status.context = context_handle[0];
1004 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
1005 	fence_status.ip_instance = 0;
1006 	fence_status.fence = ibs_request[1].seq_no;
1007 	r = amdgpu_cs_query_fence_status(&fence_status,
1008 					 500000000, 0, &expired);
1009 	CU_ASSERT_EQUAL(r, 0);
1010 	CU_ASSERT_EQUAL(expired, true);
1011 
1012 	/* 2. same engine different context */
1013 	ptr = ib_result_cpu[0];
1014 	ptr[0] = gfx_nop;
1015 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1016 	ib_info[0].size = 1;
1017 
1018 	ibs_request[0].ip_type = AMDGPU_HW_IP_GFX;
1019 	ibs_request[0].number_of_ibs = 1;
1020 	ibs_request[0].ibs = &ib_info[0];
1021 	ibs_request[0].resources = bo_list[0];
1022 	ibs_request[0].fence_info.handle = NULL;
1023 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1024 	CU_ASSERT_EQUAL(r, 0);
1025 	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
1026 	CU_ASSERT_EQUAL(r, 0);
1027 
1028 	r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
1029 	CU_ASSERT_EQUAL(r, 0);
1030 	ptr = ib_result_cpu[1];
1031 	ptr[0] = gfx_nop;
1032 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1033 	ib_info[1].size = 1;
1034 
1035 	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
1036 	ibs_request[1].number_of_ibs = 1;
1037 	ibs_request[1].ibs = &ib_info[1];
1038 	ibs_request[1].resources = bo_list[1];
1039 	ibs_request[1].fence_info.handle = NULL;
1040 	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
1041 
1042 	CU_ASSERT_EQUAL(r, 0);
1043 
1044 	fence_status.context = context_handle[1];
1045 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
1046 	fence_status.ip_instance = 0;
1047 	fence_status.fence = ibs_request[1].seq_no;
1048 	r = amdgpu_cs_query_fence_status(&fence_status,
1049 					 500000000, 0, &expired);
1050 	CU_ASSERT_EQUAL(r, 0);
1051 	CU_ASSERT_EQUAL(expired, true);
1052 
1053 	for (i = 0; i < 2; i++) {
1054 		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
1055 					     ib_result_mc_address[i], 4096);
1056 		CU_ASSERT_EQUAL(r, 0);
1057 
1058 		r = amdgpu_bo_list_destroy(bo_list[i]);
1059 		CU_ASSERT_EQUAL(r, 0);
1060 
1061 		r = amdgpu_cs_ctx_free(context_handle[i]);
1062 		CU_ASSERT_EQUAL(r, 0);
1063 	}
1064 
1065 	r = amdgpu_cs_destroy_semaphore(sem);
1066 	CU_ASSERT_EQUAL(r, 0);
1067 }
1068 
amdgpu_command_submission_compute_nop(void)1069 static void amdgpu_command_submission_compute_nop(void)
1070 {
1071 	amdgpu_context_handle context_handle;
1072 	amdgpu_bo_handle ib_result_handle;
1073 	void *ib_result_cpu;
1074 	uint64_t ib_result_mc_address;
1075 	struct amdgpu_cs_request ibs_request;
1076 	struct amdgpu_cs_ib_info ib_info;
1077 	struct amdgpu_cs_fence fence_status;
1078 	uint32_t *ptr;
1079 	uint32_t expired;
1080 	int r, instance;
1081 	amdgpu_bo_list_handle bo_list;
1082 	amdgpu_va_handle va_handle;
1083 	struct drm_amdgpu_info_hw_ip info;
1084 
1085 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1086 	CU_ASSERT_EQUAL(r, 0);
1087 
1088 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1089 	CU_ASSERT_EQUAL(r, 0);
1090 
1091 	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
1092 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1093 					    AMDGPU_GEM_DOMAIN_GTT, 0,
1094 					    &ib_result_handle, &ib_result_cpu,
1095 					    &ib_result_mc_address, &va_handle);
1096 		CU_ASSERT_EQUAL(r, 0);
1097 
1098 		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1099 				       &bo_list);
1100 		CU_ASSERT_EQUAL(r, 0);
1101 
1102 		ptr = ib_result_cpu;
1103 		memset(ptr, 0, 16);
1104 		ptr[0]=PACKET3(PACKET3_NOP, 14);
1105 
1106 		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1107 		ib_info.ib_mc_address = ib_result_mc_address;
1108 		ib_info.size = 16;
1109 
1110 		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1111 		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
1112 		ibs_request.ring = instance;
1113 		ibs_request.number_of_ibs = 1;
1114 		ibs_request.ibs = &ib_info;
1115 		ibs_request.resources = bo_list;
1116 		ibs_request.fence_info.handle = NULL;
1117 
1118 		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1119 		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
1120 		CU_ASSERT_EQUAL(r, 0);
1121 
1122 		fence_status.context = context_handle;
1123 		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
1124 		fence_status.ip_instance = 0;
1125 		fence_status.ring = instance;
1126 		fence_status.fence = ibs_request.seq_no;
1127 
1128 		r = amdgpu_cs_query_fence_status(&fence_status,
1129 						 AMDGPU_TIMEOUT_INFINITE,
1130 						 0, &expired);
1131 		CU_ASSERT_EQUAL(r, 0);
1132 
1133 		r = amdgpu_bo_list_destroy(bo_list);
1134 		CU_ASSERT_EQUAL(r, 0);
1135 
1136 		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1137 					     ib_result_mc_address, 4096);
1138 		CU_ASSERT_EQUAL(r, 0);
1139 	}
1140 
1141 	r = amdgpu_cs_ctx_free(context_handle);
1142 	CU_ASSERT_EQUAL(r, 0);
1143 }
1144 
amdgpu_command_submission_compute_cp_write_data(void)1145 static void amdgpu_command_submission_compute_cp_write_data(void)
1146 {
1147 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
1148 }
1149 
amdgpu_command_submission_compute_cp_const_fill(void)1150 static void amdgpu_command_submission_compute_cp_const_fill(void)
1151 {
1152 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
1153 }
1154 
amdgpu_command_submission_compute_cp_copy_data(void)1155 static void amdgpu_command_submission_compute_cp_copy_data(void)
1156 {
1157 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
1158 }
1159 
amdgpu_command_submission_compute(void)1160 static void amdgpu_command_submission_compute(void)
1161 {
1162 	/* write data using the CP */
1163 	amdgpu_command_submission_compute_cp_write_data();
1164 	/* const fill using the CP */
1165 	amdgpu_command_submission_compute_cp_const_fill();
1166 	/* copy data using the CP */
1167 	amdgpu_command_submission_compute_cp_copy_data();
1168 	/* nop test */
1169 	amdgpu_command_submission_compute_nop();
1170 }
1171 
1172 /*
1173  * caller need create/release:
1174  * pm4_src, resources, ib_info, and ibs_request
1175  * submit command stream described in ibs_request and wait for this IB accomplished
1176  */
amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request)1177 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
1178 				       unsigned ip_type,
1179 				       int instance, int pm4_dw, uint32_t *pm4_src,
1180 				       int res_cnt, amdgpu_bo_handle *resources,
1181 				       struct amdgpu_cs_ib_info *ib_info,
1182 				       struct amdgpu_cs_request *ibs_request)
1183 {
1184 	int r;
1185 	uint32_t expired;
1186 	uint32_t *ring_ptr;
1187 	amdgpu_bo_handle ib_result_handle;
1188 	void *ib_result_cpu;
1189 	uint64_t ib_result_mc_address;
1190 	struct amdgpu_cs_fence fence_status = {0};
1191 	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
1192 	amdgpu_va_handle va_handle;
1193 
1194 	/* prepare CS */
1195 	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
1196 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1197 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1198 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1199 	CU_ASSERT_TRUE(pm4_dw <= 1024);
1200 
1201 	/* allocate IB */
1202 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1203 				    AMDGPU_GEM_DOMAIN_GTT, 0,
1204 				    &ib_result_handle, &ib_result_cpu,
1205 				    &ib_result_mc_address, &va_handle);
1206 	CU_ASSERT_EQUAL(r, 0);
1207 
1208 	/* copy PM4 packet to ring from caller */
1209 	ring_ptr = ib_result_cpu;
1210 	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
1211 
1212 	ib_info->ib_mc_address = ib_result_mc_address;
1213 	ib_info->size = pm4_dw;
1214 
1215 	ibs_request->ip_type = ip_type;
1216 	ibs_request->ring = instance;
1217 	ibs_request->number_of_ibs = 1;
1218 	ibs_request->ibs = ib_info;
1219 	ibs_request->fence_info.handle = NULL;
1220 
1221 	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
1222 	all_res[res_cnt] = ib_result_handle;
1223 
1224 	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
1225 				  NULL, &ibs_request->resources);
1226 	CU_ASSERT_EQUAL(r, 0);
1227 
1228 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1229 
1230 	/* submit CS */
1231 	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
1232 	CU_ASSERT_EQUAL(r, 0);
1233 
1234 	r = amdgpu_bo_list_destroy(ibs_request->resources);
1235 	CU_ASSERT_EQUAL(r, 0);
1236 
1237 	fence_status.ip_type = ip_type;
1238 	fence_status.ip_instance = 0;
1239 	fence_status.ring = ibs_request->ring;
1240 	fence_status.context = context_handle;
1241 	fence_status.fence = ibs_request->seq_no;
1242 
1243 	/* wait for IB accomplished */
1244 	r = amdgpu_cs_query_fence_status(&fence_status,
1245 					 AMDGPU_TIMEOUT_INFINITE,
1246 					 0, &expired);
1247 	CU_ASSERT_EQUAL(r, 0);
1248 	CU_ASSERT_EQUAL(expired, true);
1249 
1250 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1251 				     ib_result_mc_address, 4096);
1252 	CU_ASSERT_EQUAL(r, 0);
1253 }
1254 
amdgpu_command_submission_write_linear_helper(unsigned ip_type)1255 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1256 {
1257 	const int sdma_write_length = 128;
1258 	const int pm4_dw = 256;
1259 	amdgpu_context_handle context_handle;
1260 	amdgpu_bo_handle bo;
1261 	amdgpu_bo_handle *resources;
1262 	uint32_t *pm4;
1263 	struct amdgpu_cs_ib_info *ib_info;
1264 	struct amdgpu_cs_request *ibs_request;
1265 	uint64_t bo_mc;
1266 	volatile uint32_t *bo_cpu;
1267 	int i, j, r, loop, ring_id;
1268 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1269 	amdgpu_va_handle va_handle;
1270 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1271 
1272 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1273 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1274 
1275 	ib_info = calloc(1, sizeof(*ib_info));
1276 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1277 
1278 	ibs_request = calloc(1, sizeof(*ibs_request));
1279 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1280 
1281 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1282 	CU_ASSERT_EQUAL(r, 0);
1283 
1284 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1285 	CU_ASSERT_EQUAL(r, 0);
1286 
1287 	/* prepare resource */
1288 	resources = calloc(1, sizeof(amdgpu_bo_handle));
1289 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1290 
1291 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1292 		loop = 0;
1293 		while(loop < 2) {
1294 			/* allocate UC bo for sDMA use */
1295 			r = amdgpu_bo_alloc_and_map(device_handle,
1296 						    sdma_write_length * sizeof(uint32_t),
1297 						    4096, AMDGPU_GEM_DOMAIN_GTT,
1298 						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1299 						    &bo_mc, &va_handle);
1300 			CU_ASSERT_EQUAL(r, 0);
1301 
1302 			/* clear bo */
1303 			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1304 
1305 			resources[0] = bo;
1306 
1307 			/* fulfill PM4: test DMA write-linear */
1308 			i = j = 0;
1309 			if (ip_type == AMDGPU_HW_IP_DMA) {
1310 				if (family_id == AMDGPU_FAMILY_SI)
1311 					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1312 								  sdma_write_length);
1313 				else
1314 					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1315 							       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
1316 				pm4[i++] = 0xffffffff & bo_mc;
1317 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1318 				if (family_id >= AMDGPU_FAMILY_AI)
1319 					pm4[i++] = sdma_write_length - 1;
1320 				else if (family_id != AMDGPU_FAMILY_SI)
1321 					pm4[i++] = sdma_write_length;
1322 				while(j++ < sdma_write_length)
1323 					pm4[i++] = 0xdeadbeaf;
1324 			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1325 				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1326 				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1327 				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1328 				pm4[i++] = 0xfffffffc & bo_mc;
1329 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1330 				while(j++ < sdma_write_length)
1331 					pm4[i++] = 0xdeadbeaf;
1332 			}
1333 
1334 			amdgpu_test_exec_cs_helper(context_handle,
1335 						   ip_type, ring_id,
1336 						   i, pm4,
1337 						   1, resources,
1338 						   ib_info, ibs_request);
1339 
1340 			/* verify if SDMA test result meets with expected */
1341 			i = 0;
1342 			while(i < sdma_write_length) {
1343 				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1344 			}
1345 
1346 			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1347 						     sdma_write_length * sizeof(uint32_t));
1348 			CU_ASSERT_EQUAL(r, 0);
1349 			loop++;
1350 		}
1351 	}
1352 	/* clean resources */
1353 	free(resources);
1354 	free(ibs_request);
1355 	free(ib_info);
1356 	free(pm4);
1357 
1358 	/* end of test */
1359 	r = amdgpu_cs_ctx_free(context_handle);
1360 	CU_ASSERT_EQUAL(r, 0);
1361 }
1362 
amdgpu_command_submission_sdma_write_linear(void)1363 static void amdgpu_command_submission_sdma_write_linear(void)
1364 {
1365 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1366 }
1367 
amdgpu_command_submission_const_fill_helper(unsigned ip_type)1368 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1369 {
1370 	const int sdma_write_length = 1024 * 1024;
1371 	const int pm4_dw = 256;
1372 	amdgpu_context_handle context_handle;
1373 	amdgpu_bo_handle bo;
1374 	amdgpu_bo_handle *resources;
1375 	uint32_t *pm4;
1376 	struct amdgpu_cs_ib_info *ib_info;
1377 	struct amdgpu_cs_request *ibs_request;
1378 	uint64_t bo_mc;
1379 	volatile uint32_t *bo_cpu;
1380 	int i, j, r, loop, ring_id;
1381 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1382 	amdgpu_va_handle va_handle;
1383 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1384 
1385 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1386 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1387 
1388 	ib_info = calloc(1, sizeof(*ib_info));
1389 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1390 
1391 	ibs_request = calloc(1, sizeof(*ibs_request));
1392 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1393 
1394 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1395 	CU_ASSERT_EQUAL(r, 0);
1396 
1397 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1398 	CU_ASSERT_EQUAL(r, 0);
1399 
1400 	/* prepare resource */
1401 	resources = calloc(1, sizeof(amdgpu_bo_handle));
1402 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1403 
1404 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1405 		loop = 0;
1406 		while(loop < 2) {
1407 			/* allocate UC bo for sDMA use */
1408 			r = amdgpu_bo_alloc_and_map(device_handle,
1409 						    sdma_write_length, 4096,
1410 						    AMDGPU_GEM_DOMAIN_GTT,
1411 						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1412 						    &bo_mc, &va_handle);
1413 			CU_ASSERT_EQUAL(r, 0);
1414 
1415 			/* clear bo */
1416 			memset((void*)bo_cpu, 0, sdma_write_length);
1417 
1418 			resources[0] = bo;
1419 
1420 			/* fulfill PM4: test DMA const fill */
1421 			i = j = 0;
1422 			if (ip_type == AMDGPU_HW_IP_DMA) {
1423 				if (family_id == AMDGPU_FAMILY_SI) {
1424 					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1425 								  0, 0, 0,
1426 								  sdma_write_length / 4);
1427 					pm4[i++] = 0xfffffffc & bo_mc;
1428 					pm4[i++] = 0xdeadbeaf;
1429 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1430 				} else {
1431 					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1432 							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1433 					pm4[i++] = 0xffffffff & bo_mc;
1434 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1435 					pm4[i++] = 0xdeadbeaf;
1436 					if (family_id >= AMDGPU_FAMILY_AI)
1437 						pm4[i++] = sdma_write_length - 1;
1438 					else
1439 						pm4[i++] = sdma_write_length;
1440 				}
1441 			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1442 				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1443 				if (family_id == AMDGPU_FAMILY_SI) {
1444 					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1445 					pm4[i++] = 0xdeadbeaf;
1446 					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1447 						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1448 						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1449 						   PACKET3_DMA_DATA_SI_CP_SYNC;
1450 					pm4[i++] = 0xffffffff & bo_mc;
1451 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1452 					pm4[i++] = sdma_write_length;
1453 				} else {
1454 					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1455 					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1456 						   PACKET3_DMA_DATA_DST_SEL(0) |
1457 						   PACKET3_DMA_DATA_SRC_SEL(2) |
1458 						   PACKET3_DMA_DATA_CP_SYNC;
1459 					pm4[i++] = 0xdeadbeaf;
1460 					pm4[i++] = 0;
1461 					pm4[i++] = 0xfffffffc & bo_mc;
1462 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1463 					pm4[i++] = sdma_write_length;
1464 				}
1465 			}
1466 
1467 			amdgpu_test_exec_cs_helper(context_handle,
1468 						   ip_type, ring_id,
1469 						   i, pm4,
1470 						   1, resources,
1471 						   ib_info, ibs_request);
1472 
1473 			/* verify if SDMA test result meets with expected */
1474 			i = 0;
1475 			while(i < (sdma_write_length / 4)) {
1476 				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1477 			}
1478 
1479 			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1480 						     sdma_write_length);
1481 			CU_ASSERT_EQUAL(r, 0);
1482 			loop++;
1483 		}
1484 	}
1485 	/* clean resources */
1486 	free(resources);
1487 	free(ibs_request);
1488 	free(ib_info);
1489 	free(pm4);
1490 
1491 	/* end of test */
1492 	r = amdgpu_cs_ctx_free(context_handle);
1493 	CU_ASSERT_EQUAL(r, 0);
1494 }
1495 
amdgpu_command_submission_sdma_const_fill(void)1496 static void amdgpu_command_submission_sdma_const_fill(void)
1497 {
1498 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1499 }
1500 
amdgpu_command_submission_copy_linear_helper(unsigned ip_type)1501 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1502 {
1503 	const int sdma_write_length = 1024;
1504 	const int pm4_dw = 256;
1505 	amdgpu_context_handle context_handle;
1506 	amdgpu_bo_handle bo1, bo2;
1507 	amdgpu_bo_handle *resources;
1508 	uint32_t *pm4;
1509 	struct amdgpu_cs_ib_info *ib_info;
1510 	struct amdgpu_cs_request *ibs_request;
1511 	uint64_t bo1_mc, bo2_mc;
1512 	volatile unsigned char *bo1_cpu, *bo2_cpu;
1513 	int i, j, r, loop1, loop2, ring_id;
1514 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1515 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1516 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1517 
1518 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1519 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1520 
1521 	ib_info = calloc(1, sizeof(*ib_info));
1522 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1523 
1524 	ibs_request = calloc(1, sizeof(*ibs_request));
1525 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1526 
1527 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1528 	CU_ASSERT_EQUAL(r, 0);
1529 
1530 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1531 	CU_ASSERT_EQUAL(r, 0);
1532 
1533 	/* prepare resource */
1534 	resources = calloc(2, sizeof(amdgpu_bo_handle));
1535 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1536 
1537 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1538 		loop1 = loop2 = 0;
1539 		/* run 9 circle to test all mapping combination */
1540 		while(loop1 < 2) {
1541 			while(loop2 < 2) {
1542 				/* allocate UC bo1for sDMA use */
1543 				r = amdgpu_bo_alloc_and_map(device_handle,
1544 							    sdma_write_length, 4096,
1545 							    AMDGPU_GEM_DOMAIN_GTT,
1546 							    gtt_flags[loop1], &bo1,
1547 							    (void**)&bo1_cpu, &bo1_mc,
1548 							    &bo1_va_handle);
1549 				CU_ASSERT_EQUAL(r, 0);
1550 
1551 				/* set bo1 */
1552 				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1553 
1554 				/* allocate UC bo2 for sDMA use */
1555 				r = amdgpu_bo_alloc_and_map(device_handle,
1556 							    sdma_write_length, 4096,
1557 							    AMDGPU_GEM_DOMAIN_GTT,
1558 							    gtt_flags[loop2], &bo2,
1559 							    (void**)&bo2_cpu, &bo2_mc,
1560 							    &bo2_va_handle);
1561 				CU_ASSERT_EQUAL(r, 0);
1562 
1563 				/* clear bo2 */
1564 				memset((void*)bo2_cpu, 0, sdma_write_length);
1565 
1566 				resources[0] = bo1;
1567 				resources[1] = bo2;
1568 
1569 				/* fulfill PM4: test DMA copy linear */
1570 				i = j = 0;
1571 				if (ip_type == AMDGPU_HW_IP_DMA) {
1572 					if (family_id == AMDGPU_FAMILY_SI) {
1573 						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1574 									  0, 0, 0,
1575 									  sdma_write_length);
1576 						pm4[i++] = 0xffffffff & bo2_mc;
1577 						pm4[i++] = 0xffffffff & bo1_mc;
1578 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1579 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1580 					} else {
1581 						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1582 								       SDMA_COPY_SUB_OPCODE_LINEAR,
1583 								       0);
1584 						if (family_id >= AMDGPU_FAMILY_AI)
1585 							pm4[i++] = sdma_write_length - 1;
1586 						else
1587 							pm4[i++] = sdma_write_length;
1588 						pm4[i++] = 0;
1589 						pm4[i++] = 0xffffffff & bo1_mc;
1590 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1591 						pm4[i++] = 0xffffffff & bo2_mc;
1592 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1593 					}
1594 				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1595 					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1596 					if (family_id == AMDGPU_FAMILY_SI) {
1597 						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1598 						pm4[i++] = 0xfffffffc & bo1_mc;
1599 						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1600 							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1601 							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1602 							   PACKET3_DMA_DATA_SI_CP_SYNC |
1603 							   (0xffff00000000 & bo1_mc) >> 32;
1604 						pm4[i++] = 0xfffffffc & bo2_mc;
1605 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1606 						pm4[i++] = sdma_write_length;
1607 					} else {
1608 						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1609 						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1610 							   PACKET3_DMA_DATA_DST_SEL(0) |
1611 							   PACKET3_DMA_DATA_SRC_SEL(0) |
1612 							   PACKET3_DMA_DATA_CP_SYNC;
1613 						pm4[i++] = 0xfffffffc & bo1_mc;
1614 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1615 						pm4[i++] = 0xfffffffc & bo2_mc;
1616 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1617 						pm4[i++] = sdma_write_length;
1618 					}
1619 				}
1620 
1621 				amdgpu_test_exec_cs_helper(context_handle,
1622 							   ip_type, ring_id,
1623 							   i, pm4,
1624 							   2, resources,
1625 							   ib_info, ibs_request);
1626 
1627 				/* verify if SDMA test result meets with expected */
1628 				i = 0;
1629 				while(i < sdma_write_length) {
1630 					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1631 				}
1632 				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1633 							     sdma_write_length);
1634 				CU_ASSERT_EQUAL(r, 0);
1635 				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1636 							     sdma_write_length);
1637 				CU_ASSERT_EQUAL(r, 0);
1638 				loop2++;
1639 			}
1640 			loop1++;
1641 		}
1642 	}
1643 	/* clean resources */
1644 	free(resources);
1645 	free(ibs_request);
1646 	free(ib_info);
1647 	free(pm4);
1648 
1649 	/* end of test */
1650 	r = amdgpu_cs_ctx_free(context_handle);
1651 	CU_ASSERT_EQUAL(r, 0);
1652 }
1653 
amdgpu_command_submission_sdma_copy_linear(void)1654 static void amdgpu_command_submission_sdma_copy_linear(void)
1655 {
1656 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
1657 }
1658 
amdgpu_command_submission_sdma(void)1659 static void amdgpu_command_submission_sdma(void)
1660 {
1661 	amdgpu_command_submission_sdma_write_linear();
1662 	amdgpu_command_submission_sdma_const_fill();
1663 	amdgpu_command_submission_sdma_copy_linear();
1664 }
1665 
amdgpu_command_submission_multi_fence_wait_all(bool wait_all)1666 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1667 {
1668 	amdgpu_context_handle context_handle;
1669 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1670 	void *ib_result_cpu, *ib_result_ce_cpu;
1671 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1672 	struct amdgpu_cs_request ibs_request[2] = {0};
1673 	struct amdgpu_cs_ib_info ib_info[2];
1674 	struct amdgpu_cs_fence fence_status[2] = {0};
1675 	uint32_t *ptr;
1676 	uint32_t expired;
1677 	amdgpu_bo_list_handle bo_list;
1678 	amdgpu_va_handle va_handle, va_handle_ce;
1679 	int r;
1680 	int i = 0, ib_cs_num = 2;
1681 
1682 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1683 	CU_ASSERT_EQUAL(r, 0);
1684 
1685 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1686 				    AMDGPU_GEM_DOMAIN_GTT, 0,
1687 				    &ib_result_handle, &ib_result_cpu,
1688 				    &ib_result_mc_address, &va_handle);
1689 	CU_ASSERT_EQUAL(r, 0);
1690 
1691 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1692 				    AMDGPU_GEM_DOMAIN_GTT, 0,
1693 				    &ib_result_ce_handle, &ib_result_ce_cpu,
1694 				    &ib_result_ce_mc_address, &va_handle_ce);
1695 	CU_ASSERT_EQUAL(r, 0);
1696 
1697 	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1698 			       ib_result_ce_handle, &bo_list);
1699 	CU_ASSERT_EQUAL(r, 0);
1700 
1701 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1702 
1703 	/* IT_SET_CE_DE_COUNTERS */
1704 	ptr = ib_result_ce_cpu;
1705 	if (family_id != AMDGPU_FAMILY_SI) {
1706 		ptr[i++] = 0xc0008900;
1707 		ptr[i++] = 0;
1708 	}
1709 	ptr[i++] = 0xc0008400;
1710 	ptr[i++] = 1;
1711 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1712 	ib_info[0].size = i;
1713 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1714 
1715 	/* IT_WAIT_ON_CE_COUNTER */
1716 	ptr = ib_result_cpu;
1717 	ptr[0] = 0xc0008600;
1718 	ptr[1] = 0x00000001;
1719 	ib_info[1].ib_mc_address = ib_result_mc_address;
1720 	ib_info[1].size = 2;
1721 
1722 	for (i = 0; i < ib_cs_num; i++) {
1723 		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1724 		ibs_request[i].number_of_ibs = 2;
1725 		ibs_request[i].ibs = ib_info;
1726 		ibs_request[i].resources = bo_list;
1727 		ibs_request[i].fence_info.handle = NULL;
1728 	}
1729 
1730 	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1731 
1732 	CU_ASSERT_EQUAL(r, 0);
1733 
1734 	for (i = 0; i < ib_cs_num; i++) {
1735 		fence_status[i].context = context_handle;
1736 		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1737 		fence_status[i].fence = ibs_request[i].seq_no;
1738 	}
1739 
1740 	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1741 				AMDGPU_TIMEOUT_INFINITE,
1742 				&expired, NULL);
1743 	CU_ASSERT_EQUAL(r, 0);
1744 
1745 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1746 				     ib_result_mc_address, 4096);
1747 	CU_ASSERT_EQUAL(r, 0);
1748 
1749 	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
1750 				     ib_result_ce_mc_address, 4096);
1751 	CU_ASSERT_EQUAL(r, 0);
1752 
1753 	r = amdgpu_bo_list_destroy(bo_list);
1754 	CU_ASSERT_EQUAL(r, 0);
1755 
1756 	r = amdgpu_cs_ctx_free(context_handle);
1757 	CU_ASSERT_EQUAL(r, 0);
1758 }
1759 
amdgpu_command_submission_multi_fence(void)1760 static void amdgpu_command_submission_multi_fence(void)
1761 {
1762 	amdgpu_command_submission_multi_fence_wait_all(true);
1763 	amdgpu_command_submission_multi_fence_wait_all(false);
1764 }
1765 
amdgpu_userptr_test(void)1766 static void amdgpu_userptr_test(void)
1767 {
1768 	int i, r, j;
1769 	uint32_t *pm4 = NULL;
1770 	uint64_t bo_mc;
1771 	void *ptr = NULL;
1772 	int pm4_dw = 256;
1773 	int sdma_write_length = 4;
1774 	amdgpu_bo_handle handle;
1775 	amdgpu_context_handle context_handle;
1776 	struct amdgpu_cs_ib_info *ib_info;
1777 	struct amdgpu_cs_request *ibs_request;
1778 	amdgpu_bo_handle buf_handle;
1779 	amdgpu_va_handle va_handle;
1780 
1781 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1782 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1783 
1784 	ib_info = calloc(1, sizeof(*ib_info));
1785 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1786 
1787 	ibs_request = calloc(1, sizeof(*ibs_request));
1788 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1789 
1790 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1791 	CU_ASSERT_EQUAL(r, 0);
1792 
1793 	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
1794 	CU_ASSERT_NOT_EQUAL(ptr, NULL);
1795 	memset(ptr, 0, BUFFER_SIZE);
1796 
1797 	r = amdgpu_create_bo_from_user_mem(device_handle,
1798 					   ptr, BUFFER_SIZE, &buf_handle);
1799 	CU_ASSERT_EQUAL(r, 0);
1800 
1801 	r = amdgpu_va_range_alloc(device_handle,
1802 				  amdgpu_gpu_va_range_general,
1803 				  BUFFER_SIZE, 1, 0, &bo_mc,
1804 				  &va_handle, 0);
1805 	CU_ASSERT_EQUAL(r, 0);
1806 
1807 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
1808 	CU_ASSERT_EQUAL(r, 0);
1809 
1810 	handle = buf_handle;
1811 
1812 	j = i = 0;
1813 
1814 	if (family_id == AMDGPU_FAMILY_SI)
1815 		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1816 				sdma_write_length);
1817 	else
1818 		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1819 				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
1820 	pm4[i++] = 0xffffffff & bo_mc;
1821 	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1822 	if (family_id >= AMDGPU_FAMILY_AI)
1823 		pm4[i++] = sdma_write_length - 1;
1824 	else if (family_id != AMDGPU_FAMILY_SI)
1825 		pm4[i++] = sdma_write_length;
1826 
1827 	while (j++ < sdma_write_length)
1828 		pm4[i++] = 0xdeadbeaf;
1829 
1830 	if (!fork()) {
1831 		pm4[0] = 0x0;
1832 		exit(0);
1833 	}
1834 
1835 	amdgpu_test_exec_cs_helper(context_handle,
1836 				   AMDGPU_HW_IP_DMA, 0,
1837 				   i, pm4,
1838 				   1, &handle,
1839 				   ib_info, ibs_request);
1840 	i = 0;
1841 	while (i < sdma_write_length) {
1842 		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
1843 	}
1844 	free(ibs_request);
1845 	free(ib_info);
1846 	free(pm4);
1847 
1848 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
1849 	CU_ASSERT_EQUAL(r, 0);
1850 	r = amdgpu_va_range_free(va_handle);
1851 	CU_ASSERT_EQUAL(r, 0);
1852 	r = amdgpu_bo_free(buf_handle);
1853 	CU_ASSERT_EQUAL(r, 0);
1854 	free(ptr);
1855 
1856 	r = amdgpu_cs_ctx_free(context_handle);
1857 	CU_ASSERT_EQUAL(r, 0);
1858 
1859 	wait(NULL);
1860 }
1861 
amdgpu_sync_dependency_test(void)1862 static void amdgpu_sync_dependency_test(void)
1863 {
1864 	amdgpu_context_handle context_handle[2];
1865 	amdgpu_bo_handle ib_result_handle;
1866 	void *ib_result_cpu;
1867 	uint64_t ib_result_mc_address;
1868 	struct amdgpu_cs_request ibs_request;
1869 	struct amdgpu_cs_ib_info ib_info;
1870 	struct amdgpu_cs_fence fence_status;
1871 	uint32_t expired;
1872 	int i, j, r;
1873 	amdgpu_bo_list_handle bo_list;
1874 	amdgpu_va_handle va_handle;
1875 	static uint32_t *ptr;
1876 	uint64_t seq_no;
1877 
1878 	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
1879 	CU_ASSERT_EQUAL(r, 0);
1880 	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
1881 	CU_ASSERT_EQUAL(r, 0);
1882 
1883 	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
1884 			AMDGPU_GEM_DOMAIN_GTT, 0,
1885 						    &ib_result_handle, &ib_result_cpu,
1886 						    &ib_result_mc_address, &va_handle);
1887 	CU_ASSERT_EQUAL(r, 0);
1888 
1889 	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1890 			       &bo_list);
1891 	CU_ASSERT_EQUAL(r, 0);
1892 
1893 	ptr = ib_result_cpu;
1894 	i = 0;
1895 
1896 	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
1897 
1898 	/* Dispatch minimal init config and verify it's executed */
1899 	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
1900 	ptr[i++] = 0x80000000;
1901 	ptr[i++] = 0x80000000;
1902 
1903 	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
1904 	ptr[i++] = 0x80000000;
1905 
1906 
1907 	/* Program compute regs */
1908 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1909 	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1910 	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
1911 	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
1912 
1913 
1914 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1915 	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
1916 	/*
1917 	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
1918 	                                      SGPRS = 1
1919 	                                      PRIORITY = 0
1920 	                                      FLOAT_MODE = 192 (0xc0)
1921 	                                      PRIV = 0
1922 	                                      DX10_CLAMP = 1
1923 	                                      DEBUG_MODE = 0
1924 	                                      IEEE_MODE = 0
1925 	                                      BULKY = 0
1926 	                                      CDBG_USER = 0
1927 	 *
1928 	 */
1929 	ptr[i++] = 0x002c0040;
1930 
1931 
1932 	/*
1933 	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
1934 	                                      USER_SGPR = 8
1935 	                                      TRAP_PRESENT = 0
1936 	                                      TGID_X_EN = 0
1937 	                                      TGID_Y_EN = 0
1938 	                                      TGID_Z_EN = 0
1939 	                                      TG_SIZE_EN = 0
1940 	                                      TIDIG_COMP_CNT = 0
1941 	                                      EXCP_EN_MSB = 0
1942 	                                      LDS_SIZE = 0
1943 	                                      EXCP_EN = 0
1944 	 *
1945 	 */
1946 	ptr[i++] = 0x00000010;
1947 
1948 
1949 /*
1950  * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
1951                                          WAVESIZE = 0
1952  *
1953  */
1954 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
1955 	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
1956 	ptr[i++] = 0x00000100;
1957 
1958 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1959 	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
1960 	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
1961 	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
1962 
1963 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
1964 	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
1965 	ptr[i++] = 0;
1966 
1967 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
1968 	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
1969 	ptr[i++] = 1;
1970 	ptr[i++] = 1;
1971 	ptr[i++] = 1;
1972 
1973 
1974 	/* Dispatch */
1975 	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1976 	ptr[i++] = 1;
1977 	ptr[i++] = 1;
1978 	ptr[i++] = 1;
1979 	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
1980 
1981 
1982 	while (i & 7)
1983 		ptr[i++] =  0xffff1000; /* type3 nop packet */
1984 
1985 	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1986 	ib_info.ib_mc_address = ib_result_mc_address;
1987 	ib_info.size = i;
1988 
1989 	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1990 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
1991 	ibs_request.ring = 0;
1992 	ibs_request.number_of_ibs = 1;
1993 	ibs_request.ibs = &ib_info;
1994 	ibs_request.resources = bo_list;
1995 	ibs_request.fence_info.handle = NULL;
1996 
1997 	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
1998 	CU_ASSERT_EQUAL(r, 0);
1999 	seq_no = ibs_request.seq_no;
2000 
2001 
2002 
2003 	/* Prepare second command with dependency on the first */
2004 	j = i;
2005 	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
2006 	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
2007 	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
2008 	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2009 	ptr[i++] = 99;
2010 
2011 	while (i & 7)
2012 		ptr[i++] =  0xffff1000; /* type3 nop packet */
2013 
2014 	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2015 	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
2016 	ib_info.size = i - j;
2017 
2018 	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2019 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2020 	ibs_request.ring = 0;
2021 	ibs_request.number_of_ibs = 1;
2022 	ibs_request.ibs = &ib_info;
2023 	ibs_request.resources = bo_list;
2024 	ibs_request.fence_info.handle = NULL;
2025 
2026 	ibs_request.number_of_dependencies = 1;
2027 
2028 	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
2029 	ibs_request.dependencies[0].context = context_handle[1];
2030 	ibs_request.dependencies[0].ip_instance = 0;
2031 	ibs_request.dependencies[0].ring = 0;
2032 	ibs_request.dependencies[0].fence = seq_no;
2033 
2034 
2035 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
2036 	CU_ASSERT_EQUAL(r, 0);
2037 
2038 
2039 	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
2040 	fence_status.context = context_handle[0];
2041 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
2042 	fence_status.ip_instance = 0;
2043 	fence_status.ring = 0;
2044 	fence_status.fence = ibs_request.seq_no;
2045 
2046 	r = amdgpu_cs_query_fence_status(&fence_status,
2047 		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
2048 	CU_ASSERT_EQUAL(r, 0);
2049 
2050 	/* Expect the second command to wait for shader to complete */
2051 	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
2052 
2053 	r = amdgpu_bo_list_destroy(bo_list);
2054 	CU_ASSERT_EQUAL(r, 0);
2055 
2056 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2057 				     ib_result_mc_address, 4096);
2058 	CU_ASSERT_EQUAL(r, 0);
2059 
2060 	r = amdgpu_cs_ctx_free(context_handle[0]);
2061 	CU_ASSERT_EQUAL(r, 0);
2062 	r = amdgpu_cs_ctx_free(context_handle[1]);
2063 	CU_ASSERT_EQUAL(r, 0);
2064 
2065 	free(ibs_request.dependencies);
2066 }
2067 
amdgpu_dispatch_load_cs_shader(uint8_t * ptr,int cs_type)2068 static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
2069 					   int cs_type)
2070 {
2071 	uint32_t shader_size;
2072 	const uint32_t *shader;
2073 
2074 	switch (cs_type) {
2075 		case CS_BUFFERCLEAR:
2076 			shader = bufferclear_cs_shader_gfx9;
2077 			shader_size = sizeof(bufferclear_cs_shader_gfx9);
2078 			break;
2079 		case CS_BUFFERCOPY:
2080 			shader = buffercopy_cs_shader_gfx9;
2081 			shader_size = sizeof(buffercopy_cs_shader_gfx9);
2082 			break;
2083 		default:
2084 			return -1;
2085 			break;
2086 	}
2087 
2088 	memcpy(ptr, shader, shader_size);
2089 	return 0;
2090 }
2091 
amdgpu_dispatch_init(uint32_t * ptr,uint32_t ip_type)2092 static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type)
2093 {
2094 	int i = 0;
2095 
2096 	/* Write context control and load shadowing register if necessary */
2097 	if (ip_type == AMDGPU_HW_IP_GFX) {
2098 		ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2099 		ptr[i++] = 0x80000000;
2100 		ptr[i++] = 0x80000000;
2101 	}
2102 
2103 	/* Issue commands to set default compute state. */
2104 	/* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
2105 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
2106 	ptr[i++] = 0x204;
2107 	i += 3;
2108 
2109 	/* clear mmCOMPUTE_TMPRING_SIZE */
2110 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2111 	ptr[i++] = 0x218;
2112 	ptr[i++] = 0;
2113 
2114 	return i;
2115 }
2116 
amdgpu_dispatch_write_cumask(uint32_t * ptr)2117 static int amdgpu_dispatch_write_cumask(uint32_t *ptr)
2118 {
2119 	int i = 0;
2120 
2121 	/*  Issue commands to set cu mask used in current dispatch */
2122 	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
2123 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2124 	ptr[i++] = 0x216;
2125 	ptr[i++] = 0xffffffff;
2126 	ptr[i++] = 0xffffffff;
2127 	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
2128 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2129 	ptr[i++] = 0x219;
2130 	ptr[i++] = 0xffffffff;
2131 	ptr[i++] = 0xffffffff;
2132 
2133 	return i;
2134 }
2135 
amdgpu_dispatch_write2hw(uint32_t * ptr,uint64_t shader_addr)2136 static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr)
2137 {
2138 	int i, j;
2139 
2140 	i = 0;
2141 
2142 	/* Writes shader state to HW */
2143 	/* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
2144 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2145 	ptr[i++] = 0x20c;
2146 	ptr[i++] = (shader_addr >> 8);
2147 	ptr[i++] = (shader_addr >> 40);
2148 	/* write sh regs*/
2149 	for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
2150 		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2151 		/* - Gfx9ShRegBase */
2152 		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
2153 		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
2154 	}
2155 
2156 	return i;
2157 }
2158 
amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring)2159 static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
2160 					 uint32_t ip_type,
2161 					 uint32_t ring)
2162 {
2163 	amdgpu_context_handle context_handle;
2164 	amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
2165 	volatile unsigned char *ptr_dst;
2166 	void *ptr_shader;
2167 	uint32_t *ptr_cmd;
2168 	uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
2169 	amdgpu_va_handle va_dst, va_shader, va_cmd;
2170 	int i, r;
2171 	int bo_dst_size = 16384;
2172 	int bo_shader_size = 4096;
2173 	int bo_cmd_size = 4096;
2174 	struct amdgpu_cs_request ibs_request = {0};
2175 	struct amdgpu_cs_ib_info ib_info= {0};
2176 	amdgpu_bo_list_handle bo_list;
2177 	struct amdgpu_cs_fence fence_status = {0};
2178 	uint32_t expired;
2179 
2180 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2181 	CU_ASSERT_EQUAL(r, 0);
2182 
2183 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2184 					AMDGPU_GEM_DOMAIN_GTT, 0,
2185 					&bo_cmd, (void **)&ptr_cmd,
2186 					&mc_address_cmd, &va_cmd);
2187 	CU_ASSERT_EQUAL(r, 0);
2188 	memset(ptr_cmd, 0, bo_cmd_size);
2189 
2190 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2191 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2192 					&bo_shader, &ptr_shader,
2193 					&mc_address_shader, &va_shader);
2194 	CU_ASSERT_EQUAL(r, 0);
2195 	memset(ptr_shader, 0, bo_shader_size);
2196 
2197 	r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR);
2198 	CU_ASSERT_EQUAL(r, 0);
2199 
2200 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2201 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2202 					&bo_dst, (void **)&ptr_dst,
2203 					&mc_address_dst, &va_dst);
2204 	CU_ASSERT_EQUAL(r, 0);
2205 
2206 	i = 0;
2207 	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2208 
2209 	/*  Issue commands to set cu mask used in current dispatch */
2210 	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2211 
2212 	/* Writes shader state to HW */
2213 	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2214 
2215 	/* Write constant data */
2216 	/* Writes the UAV constant data to the SGPRs. */
2217 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2218 	ptr_cmd[i++] = 0x240;
2219 	ptr_cmd[i++] = mc_address_dst;
2220 	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2221 	ptr_cmd[i++] = 0x400;
2222 	ptr_cmd[i++] = 0x74fac;
2223 
2224 	/* Sets a range of pixel shader constants */
2225 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2226 	ptr_cmd[i++] = 0x244;
2227 	ptr_cmd[i++] = 0x22222222;
2228 	ptr_cmd[i++] = 0x22222222;
2229 	ptr_cmd[i++] = 0x22222222;
2230 	ptr_cmd[i++] = 0x22222222;
2231 
2232 	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2233 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2234 	ptr_cmd[i++] = 0x215;
2235 	ptr_cmd[i++] = 0;
2236 
2237 	/* dispatch direct command */
2238 	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2239 	ptr_cmd[i++] = 0x10;
2240 	ptr_cmd[i++] = 1;
2241 	ptr_cmd[i++] = 1;
2242 	ptr_cmd[i++] = 1;
2243 
2244 	while (i & 7)
2245 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2246 
2247 	resources[0] = bo_dst;
2248 	resources[1] = bo_shader;
2249 	resources[2] = bo_cmd;
2250 	r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
2251 	CU_ASSERT_EQUAL(r, 0);
2252 
2253 	ib_info.ib_mc_address = mc_address_cmd;
2254 	ib_info.size = i;
2255 	ibs_request.ip_type = ip_type;
2256 	ibs_request.ring = ring;
2257 	ibs_request.resources = bo_list;
2258 	ibs_request.number_of_ibs = 1;
2259 	ibs_request.ibs = &ib_info;
2260 	ibs_request.fence_info.handle = NULL;
2261 
2262 	/* submit CS */
2263 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2264 	CU_ASSERT_EQUAL(r, 0);
2265 
2266 	r = amdgpu_bo_list_destroy(bo_list);
2267 	CU_ASSERT_EQUAL(r, 0);
2268 
2269 	fence_status.ip_type = ip_type;
2270 	fence_status.ip_instance = 0;
2271 	fence_status.ring = ring;
2272 	fence_status.context = context_handle;
2273 	fence_status.fence = ibs_request.seq_no;
2274 
2275 	/* wait for IB accomplished */
2276 	r = amdgpu_cs_query_fence_status(&fence_status,
2277 					 AMDGPU_TIMEOUT_INFINITE,
2278 					 0, &expired);
2279 	CU_ASSERT_EQUAL(r, 0);
2280 	CU_ASSERT_EQUAL(expired, true);
2281 
2282 	/* verify if memset test result meets with expected */
2283 	i = 0;
2284 	while(i < bo_dst_size) {
2285 		CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
2286 	}
2287 
2288 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2289 	CU_ASSERT_EQUAL(r, 0);
2290 
2291 	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2292 	CU_ASSERT_EQUAL(r, 0);
2293 
2294 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2295 	CU_ASSERT_EQUAL(r, 0);
2296 
2297 	r = amdgpu_cs_ctx_free(context_handle);
2298 	CU_ASSERT_EQUAL(r, 0);
2299 }
2300 
amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring)2301 static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
2302 					uint32_t ip_type,
2303 					uint32_t ring)
2304 {
2305 	amdgpu_context_handle context_handle;
2306 	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2307 	volatile unsigned char *ptr_dst;
2308 	void *ptr_shader;
2309 	unsigned char *ptr_src;
2310 	uint32_t *ptr_cmd;
2311 	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2312 	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2313 	int i, r;
2314 	int bo_dst_size = 16384;
2315 	int bo_shader_size = 4096;
2316 	int bo_cmd_size = 4096;
2317 	struct amdgpu_cs_request ibs_request = {0};
2318 	struct amdgpu_cs_ib_info ib_info= {0};
2319 	uint32_t expired;
2320 	amdgpu_bo_list_handle bo_list;
2321 	struct amdgpu_cs_fence fence_status = {0};
2322 
2323 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2324 	CU_ASSERT_EQUAL(r, 0);
2325 
2326 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2327 				    AMDGPU_GEM_DOMAIN_GTT, 0,
2328 				    &bo_cmd, (void **)&ptr_cmd,
2329 				    &mc_address_cmd, &va_cmd);
2330 	CU_ASSERT_EQUAL(r, 0);
2331 	memset(ptr_cmd, 0, bo_cmd_size);
2332 
2333 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2334 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2335 					&bo_shader, &ptr_shader,
2336 					&mc_address_shader, &va_shader);
2337 	CU_ASSERT_EQUAL(r, 0);
2338 	memset(ptr_shader, 0, bo_shader_size);
2339 
2340 	r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCOPY );
2341 	CU_ASSERT_EQUAL(r, 0);
2342 
2343 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2344 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2345 					&bo_src, (void **)&ptr_src,
2346 					&mc_address_src, &va_src);
2347 	CU_ASSERT_EQUAL(r, 0);
2348 
2349 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2350 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2351 					&bo_dst, (void **)&ptr_dst,
2352 					&mc_address_dst, &va_dst);
2353 	CU_ASSERT_EQUAL(r, 0);
2354 
2355 	memset(ptr_src, 0x55, bo_dst_size);
2356 
2357 	i = 0;
2358 	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2359 
2360 	/*  Issue commands to set cu mask used in current dispatch */
2361 	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2362 
2363 	/* Writes shader state to HW */
2364 	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2365 
2366 	/* Write constant data */
2367 	/* Writes the texture resource constants data to the SGPRs */
2368 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2369 	ptr_cmd[i++] = 0x240;
2370 	ptr_cmd[i++] = mc_address_src;
2371 	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2372 	ptr_cmd[i++] = 0x400;
2373 	ptr_cmd[i++] = 0x74fac;
2374 
2375 	/* Writes the UAV constant data to the SGPRs. */
2376 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2377 	ptr_cmd[i++] = 0x244;
2378 	ptr_cmd[i++] = mc_address_dst;
2379 	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2380 	ptr_cmd[i++] = 0x400;
2381 	ptr_cmd[i++] = 0x74fac;
2382 
2383 	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2384 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2385 	ptr_cmd[i++] = 0x215;
2386 	ptr_cmd[i++] = 0;
2387 
2388 	/* dispatch direct command */
2389 	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2390 	ptr_cmd[i++] = 0x10;
2391 	ptr_cmd[i++] = 1;
2392 	ptr_cmd[i++] = 1;
2393 	ptr_cmd[i++] = 1;
2394 
2395 	while (i & 7)
2396 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2397 
2398 	resources[0] = bo_shader;
2399 	resources[1] = bo_src;
2400 	resources[2] = bo_dst;
2401 	resources[3] = bo_cmd;
2402 	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2403 	CU_ASSERT_EQUAL(r, 0);
2404 
2405 	ib_info.ib_mc_address = mc_address_cmd;
2406 	ib_info.size = i;
2407 	ibs_request.ip_type = ip_type;
2408 	ibs_request.ring = ring;
2409 	ibs_request.resources = bo_list;
2410 	ibs_request.number_of_ibs = 1;
2411 	ibs_request.ibs = &ib_info;
2412 	ibs_request.fence_info.handle = NULL;
2413 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2414 	CU_ASSERT_EQUAL(r, 0);
2415 
2416 	fence_status.ip_type = ip_type;
2417 	fence_status.ip_instance = 0;
2418 	fence_status.ring = ring;
2419 	fence_status.context = context_handle;
2420 	fence_status.fence = ibs_request.seq_no;
2421 
2422 	/* wait for IB accomplished */
2423 	r = amdgpu_cs_query_fence_status(&fence_status,
2424 					 AMDGPU_TIMEOUT_INFINITE,
2425 					 0, &expired);
2426 	CU_ASSERT_EQUAL(r, 0);
2427 	CU_ASSERT_EQUAL(expired, true);
2428 
2429 	/* verify if memcpy test result meets with expected */
2430 	i = 0;
2431 	while(i < bo_dst_size) {
2432 		CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
2433 		i++;
2434 	}
2435 
2436 	r = amdgpu_bo_list_destroy(bo_list);
2437 	CU_ASSERT_EQUAL(r, 0);
2438 
2439 	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2440 	CU_ASSERT_EQUAL(r, 0);
2441 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2442 	CU_ASSERT_EQUAL(r, 0);
2443 
2444 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2445 	CU_ASSERT_EQUAL(r, 0);
2446 
2447 	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2448 	CU_ASSERT_EQUAL(r, 0);
2449 
2450 	r = amdgpu_cs_ctx_free(context_handle);
2451 	CU_ASSERT_EQUAL(r, 0);
2452 }
2453 
amdgpu_compute_dispatch_test(void)2454 static void amdgpu_compute_dispatch_test(void)
2455 {
2456 	int r;
2457 	struct drm_amdgpu_info_hw_ip info;
2458 	uint32_t ring_id;
2459 
2460 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
2461 	CU_ASSERT_EQUAL(r, 0);
2462 	if (!info.available_rings)
2463 		printf("SKIP ... as there's no compute ring\n");
2464 
2465 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2466 		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
2467 		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
2468 	}
2469 }
2470 
amdgpu_gfx_dispatch_test(void)2471 static void amdgpu_gfx_dispatch_test(void)
2472 {
2473 	int r;
2474 	struct drm_amdgpu_info_hw_ip info;
2475 	uint32_t ring_id;
2476 
2477 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2478 	CU_ASSERT_EQUAL(r, 0);
2479 	if (!info.available_rings)
2480 		printf("SKIP ... as there's no graphics ring\n");
2481 
2482 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2483 		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
2484 		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
2485 	}
2486 }
2487 
amdgpu_draw_load_ps_shader(uint8_t * ptr,int ps_type)2488 static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type)
2489 {
2490 	int i;
2491 	uint32_t shader_offset= 256;
2492 	uint32_t mem_offset, patch_code_offset;
2493 	uint32_t shader_size, patchinfo_code_size;
2494 	const uint32_t *shader;
2495 	const uint32_t *patchinfo_code;
2496 	const uint32_t *patchcode_offset;
2497 
2498 	switch (ps_type) {
2499 		case PS_CONST:
2500 			shader = ps_const_shader_gfx9;
2501 			shader_size = sizeof(ps_const_shader_gfx9);
2502 			patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
2503 			patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
2504 			patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
2505 			break;
2506 		case PS_TEX:
2507 			shader = ps_tex_shader_gfx9;
2508 			shader_size = sizeof(ps_tex_shader_gfx9);
2509 			patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
2510 			patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
2511 			patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
2512 			break;
2513 		default:
2514 			return -1;
2515 			break;
2516 	}
2517 
2518 	/* write main shader program */
2519 	for (i = 0 ; i < 10; i++) {
2520 		mem_offset = i * shader_offset;
2521 		memcpy(ptr + mem_offset, shader, shader_size);
2522 	}
2523 
2524 	/* overwrite patch codes */
2525 	for (i = 0 ; i < 10; i++) {
2526 		mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
2527 		patch_code_offset = i * patchinfo_code_size;
2528 		memcpy(ptr + mem_offset,
2529 			patchinfo_code + patch_code_offset,
2530 			patchinfo_code_size * sizeof(uint32_t));
2531 	}
2532 
2533 	return 0;
2534 }
2535 
2536 /* load RectPosTexFast_VS */
amdgpu_draw_load_vs_shader(uint8_t * ptr)2537 static int amdgpu_draw_load_vs_shader(uint8_t *ptr)
2538 {
2539 	const uint32_t *shader;
2540 	uint32_t shader_size;
2541 
2542 	shader = vs_RectPosTexFast_shader_gfx9;
2543 	shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
2544 
2545 	memcpy(ptr, shader, shader_size);
2546 
2547 	return 0;
2548 }
2549 
amdgpu_draw_init(uint32_t * ptr)2550 static int amdgpu_draw_init(uint32_t *ptr)
2551 {
2552 	int i = 0;
2553 	const uint32_t *preamblecache_ptr;
2554 	uint32_t preamblecache_size;
2555 
2556 	/* Write context control and load shadowing register if necessary */
2557 	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2558 	ptr[i++] = 0x80000000;
2559 	ptr[i++] = 0x80000000;
2560 
2561 	preamblecache_ptr = preamblecache_gfx9;
2562 	preamblecache_size = sizeof(preamblecache_gfx9);
2563 
2564 	memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
2565 	return i + preamblecache_size/sizeof(uint32_t);
2566 }
2567 
amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t * ptr,uint64_t dst_addr)2568 static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
2569 							 uint64_t dst_addr)
2570 {
2571 	int i = 0;
2572 
2573 	/* setup color buffer */
2574 	/* offset   reg
2575 	   0xA318   CB_COLOR0_BASE
2576 	   0xA319   CB_COLOR0_BASE_EXT
2577 	   0xA31A   CB_COLOR0_ATTRIB2
2578 	   0xA31B   CB_COLOR0_VIEW
2579 	   0xA31C   CB_COLOR0_INFO
2580 	   0xA31D   CB_COLOR0_ATTRIB
2581 	   0xA31E   CB_COLOR0_DCC_CONTROL
2582 	   0xA31F   CB_COLOR0_CMASK
2583 	   0xA320   CB_COLOR0_CMASK_BASE_EXT
2584 	   0xA321   CB_COLOR0_FMASK
2585 	   0xA322   CB_COLOR0_FMASK_BASE_EXT
2586 	   0xA323   CB_COLOR0_CLEAR_WORD0
2587 	   0xA324   CB_COLOR0_CLEAR_WORD1
2588 	   0xA325   CB_COLOR0_DCC_BASE
2589 	   0xA326   CB_COLOR0_DCC_BASE_EXT */
2590 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
2591 	ptr[i++] = 0x318;
2592 	ptr[i++] = dst_addr >> 8;
2593 	ptr[i++] = dst_addr >> 40;
2594 	ptr[i++] = 0x7c01f;
2595 	ptr[i++] = 0;
2596 	ptr[i++] = 0x50438;
2597 	ptr[i++] = 0x10140000;
2598 	i += 9;
2599 
2600 	/* mmCB_MRT0_EPITCH */
2601 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2602 	ptr[i++] = 0x1e8;
2603 	ptr[i++] = 0x1f;
2604 
2605 	/* 0xA32B   CB_COLOR1_BASE */
2606 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2607 	ptr[i++] = 0x32b;
2608 	ptr[i++] = 0;
2609 
2610 	/* 0xA33A   CB_COLOR1_BASE */
2611 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2612 	ptr[i++] = 0x33a;
2613 	ptr[i++] = 0;
2614 
2615 	/* SPI_SHADER_COL_FORMAT */
2616 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2617 	ptr[i++] = 0x1c5;
2618 	ptr[i++] = 9;
2619 
2620 	/* Setup depth buffer */
2621 	/* mmDB_Z_INFO */
2622 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
2623 	ptr[i++] = 0xe;
2624 	i += 2;
2625 
2626 	return i;
2627 }
2628 
amdgpu_draw_setup_and_write_drawblt_state(uint32_t * ptr)2629 static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr)
2630 {
2631 	int i = 0;
2632 	const uint32_t *cached_cmd_ptr;
2633 	uint32_t cached_cmd_size;
2634 
2635 	/* mmPA_SC_TILE_STEERING_OVERRIDE */
2636 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2637 	ptr[i++] = 0xd7;
2638 	ptr[i++] = 0;
2639 
2640 	ptr[i++] = 0xffff1000;
2641 	ptr[i++] = 0xc0021000;
2642 
2643 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2644 	ptr[i++] = 0xd7;
2645 	ptr[i++] = 1;
2646 
2647 	/* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
2648 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
2649 	ptr[i++] = 0x2fe;
2650 	i += 16;
2651 
2652 	/* mmPA_SC_CENTROID_PRIORITY_0 */
2653 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
2654 	ptr[i++] = 0x2f5;
2655 	i += 2;
2656 
2657 	cached_cmd_ptr = cached_cmd_gfx9;
2658 	cached_cmd_size = sizeof(cached_cmd_gfx9);
2659 
2660 	memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
2661 	i += cached_cmd_size/sizeof(uint32_t);
2662 
2663 	return i;
2664 }
2665 
amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr)2666 static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
2667 						  int ps_type,
2668 						  uint64_t shader_addr)
2669 {
2670 	int i = 0;
2671 
2672 	/* mmPA_CL_VS_OUT_CNTL */
2673 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2674 	ptr[i++] = 0x207;
2675 	ptr[i++] = 0;
2676 
2677 	/* mmSPI_SHADER_PGM_RSRC3_VS */
2678 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2679 	ptr[i++] = 0x46;
2680 	ptr[i++] = 0xffff;
2681 
2682 	/* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
2683 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2684 	ptr[i++] = 0x48;
2685 	ptr[i++] = shader_addr >> 8;
2686 	ptr[i++] = shader_addr >> 40;
2687 
2688 	/* mmSPI_SHADER_PGM_RSRC1_VS */
2689 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2690 	ptr[i++] = 0x4a;
2691 	ptr[i++] = 0xc0081;
2692 	/* mmSPI_SHADER_PGM_RSRC2_VS */
2693 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2694 	ptr[i++] = 0x4b;
2695 	ptr[i++] = 0x18;
2696 
2697 	/* mmSPI_VS_OUT_CONFIG */
2698 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2699 	ptr[i++] = 0x1b1;
2700 	ptr[i++] = 2;
2701 
2702 	/* mmSPI_SHADER_POS_FORMAT */
2703 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2704 	ptr[i++] = 0x1c3;
2705 	ptr[i++] = 4;
2706 
2707 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
2708 	ptr[i++] = 0x4c;
2709 	i += 2;
2710 	ptr[i++] = 0x42000000;
2711 	ptr[i++] = 0x42000000;
2712 
2713 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
2714 	ptr[i++] = 0x50;
2715 	i += 2;
2716 	if (ps_type == PS_CONST) {
2717 		i += 2;
2718 	} else if (ps_type == PS_TEX) {
2719 		ptr[i++] = 0x3f800000;
2720 		ptr[i++] = 0x3f800000;
2721 	}
2722 
2723 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
2724 	ptr[i++] = 0x54;
2725 	i += 4;
2726 
2727 	return i;
2728 }
2729 
amdgpu_draw_ps_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr)2730 static int amdgpu_draw_ps_write2hw(uint32_t *ptr,
2731 				   int ps_type,
2732 				   uint64_t shader_addr)
2733 {
2734 	int i, j;
2735 	const uint32_t *sh_registers;
2736 	const uint32_t *context_registers;
2737 	uint32_t num_sh_reg, num_context_reg;
2738 
2739 	if (ps_type == PS_CONST) {
2740 		sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
2741 		context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
2742 		num_sh_reg = ps_num_sh_registers_gfx9;
2743 		num_context_reg = ps_num_context_registers_gfx9;
2744 	} else if (ps_type == PS_TEX) {
2745 		sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
2746 		context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
2747 		num_sh_reg = ps_num_sh_registers_gfx9;
2748 		num_context_reg = ps_num_context_registers_gfx9;
2749 	}
2750 
2751 	i = 0;
2752 
2753 	/* 0x2c07   SPI_SHADER_PGM_RSRC3_PS
2754 	   0x2c08   SPI_SHADER_PGM_LO_PS
2755 	   0x2c09   SPI_SHADER_PGM_HI_PS */
2756 	shader_addr += 256 * 9;
2757 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
2758 	ptr[i++] = 0x7;
2759 	ptr[i++] = 0xffff;
2760 	ptr[i++] = shader_addr >> 8;
2761 	ptr[i++] = shader_addr >> 40;
2762 
2763 	for (j = 0; j < num_sh_reg; j++) {
2764 		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2765 		ptr[i++] = sh_registers[j * 2] - 0x2c00;
2766 		ptr[i++] = sh_registers[j * 2 + 1];
2767 	}
2768 
2769 	for (j = 0; j < num_context_reg; j++) {
2770 		if (context_registers[j * 2] != 0xA1C5) {
2771 			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2772 			ptr[i++] = context_registers[j * 2] - 0xa000;
2773 			ptr[i++] = context_registers[j * 2 + 1];
2774 		}
2775 
2776 		if (context_registers[j * 2] == 0xA1B4) {
2777 			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2778 			ptr[i++] = 0x1b3;
2779 			ptr[i++] = 2;
2780 		}
2781 	}
2782 
2783 	return i;
2784 }
2785 
amdgpu_draw_draw(uint32_t * ptr)2786 static int amdgpu_draw_draw(uint32_t *ptr)
2787 {
2788 	int i = 0;
2789 
2790 	/* mmIA_MULTI_VGT_PARAM */
2791 	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2792 	ptr[i++] = 0x40000258;
2793 	ptr[i++] = 0xd00ff;
2794 
2795 	/* mmVGT_PRIMITIVE_TYPE */
2796 	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2797 	ptr[i++] = 0x10000242;
2798 	ptr[i++] = 0x11;
2799 
2800 	ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
2801 	ptr[i++] = 3;
2802 	ptr[i++] = 2;
2803 
2804 	return i;
2805 }
2806 
amdgpu_memset_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring_id)2807 void amdgpu_memset_draw(amdgpu_device_handle device_handle,
2808 			amdgpu_bo_handle bo_shader_ps,
2809 			amdgpu_bo_handle bo_shader_vs,
2810 			uint64_t mc_address_shader_ps,
2811 			uint64_t mc_address_shader_vs,
2812 			uint32_t ring_id)
2813 {
2814 	amdgpu_context_handle context_handle;
2815 	amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
2816 	volatile unsigned char *ptr_dst;
2817 	uint32_t *ptr_cmd;
2818 	uint64_t mc_address_dst, mc_address_cmd;
2819 	amdgpu_va_handle va_dst, va_cmd;
2820 	int i, r;
2821 	int bo_dst_size = 16384;
2822 	int bo_cmd_size = 4096;
2823 	struct amdgpu_cs_request ibs_request = {0};
2824 	struct amdgpu_cs_ib_info ib_info = {0};
2825 	struct amdgpu_cs_fence fence_status = {0};
2826 	uint32_t expired;
2827 	amdgpu_bo_list_handle bo_list;
2828 
2829 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2830 	CU_ASSERT_EQUAL(r, 0);
2831 
2832 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2833 					AMDGPU_GEM_DOMAIN_GTT, 0,
2834 					&bo_cmd, (void **)&ptr_cmd,
2835 					&mc_address_cmd, &va_cmd);
2836 	CU_ASSERT_EQUAL(r, 0);
2837 	memset(ptr_cmd, 0, bo_cmd_size);
2838 
2839 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2840 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2841 					&bo_dst, (void **)&ptr_dst,
2842 					&mc_address_dst, &va_dst);
2843 	CU_ASSERT_EQUAL(r, 0);
2844 
2845 	i = 0;
2846 	i += amdgpu_draw_init(ptr_cmd + i);
2847 
2848 	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst);
2849 
2850 	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i);
2851 
2852 	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs);
2853 
2854 	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps);
2855 
2856 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
2857 	ptr_cmd[i++] = 0xc;
2858 	ptr_cmd[i++] = 0x33333333;
2859 	ptr_cmd[i++] = 0x33333333;
2860 	ptr_cmd[i++] = 0x33333333;
2861 	ptr_cmd[i++] = 0x33333333;
2862 
2863 	i += amdgpu_draw_draw(ptr_cmd + i);
2864 
2865 	while (i & 7)
2866 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2867 
2868 	resources[0] = bo_dst;
2869 	resources[1] = bo_shader_ps;
2870 	resources[2] = bo_shader_vs;
2871 	resources[3] = bo_cmd;
2872 	r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
2873 	CU_ASSERT_EQUAL(r, 0);
2874 
2875 	ib_info.ib_mc_address = mc_address_cmd;
2876 	ib_info.size = i;
2877 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2878 	ibs_request.ring = ring_id;
2879 	ibs_request.resources = bo_list;
2880 	ibs_request.number_of_ibs = 1;
2881 	ibs_request.ibs = &ib_info;
2882 	ibs_request.fence_info.handle = NULL;
2883 
2884 	/* submit CS */
2885 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2886 	CU_ASSERT_EQUAL(r, 0);
2887 
2888 	r = amdgpu_bo_list_destroy(bo_list);
2889 	CU_ASSERT_EQUAL(r, 0);
2890 
2891 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
2892 	fence_status.ip_instance = 0;
2893 	fence_status.ring = ring_id;
2894 	fence_status.context = context_handle;
2895 	fence_status.fence = ibs_request.seq_no;
2896 
2897 	/* wait for IB accomplished */
2898 	r = amdgpu_cs_query_fence_status(&fence_status,
2899 					 AMDGPU_TIMEOUT_INFINITE,
2900 					 0, &expired);
2901 	CU_ASSERT_EQUAL(r, 0);
2902 	CU_ASSERT_EQUAL(expired, true);
2903 
2904 	/* verify if memset test result meets with expected */
2905 	i = 0;
2906 	while(i < bo_dst_size) {
2907 		CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
2908 	}
2909 
2910 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2911 	CU_ASSERT_EQUAL(r, 0);
2912 
2913 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2914 	CU_ASSERT_EQUAL(r, 0);
2915 
2916 	r = amdgpu_cs_ctx_free(context_handle);
2917 	CU_ASSERT_EQUAL(r, 0);
2918 }
2919 
amdgpu_memset_draw_test(amdgpu_device_handle device_handle,uint32_t ring)2920 static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
2921 				    uint32_t ring)
2922 {
2923 	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
2924 	void *ptr_shader_ps;
2925 	void *ptr_shader_vs;
2926 	uint64_t mc_address_shader_ps, mc_address_shader_vs;
2927 	amdgpu_va_handle va_shader_ps, va_shader_vs;
2928 	int r;
2929 	int bo_shader_size = 4096;
2930 
2931 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2932 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2933 					&bo_shader_ps, &ptr_shader_ps,
2934 					&mc_address_shader_ps, &va_shader_ps);
2935 	CU_ASSERT_EQUAL(r, 0);
2936 	memset(ptr_shader_ps, 0, bo_shader_size);
2937 
2938 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2939 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2940 					&bo_shader_vs, &ptr_shader_vs,
2941 					&mc_address_shader_vs, &va_shader_vs);
2942 	CU_ASSERT_EQUAL(r, 0);
2943 	memset(ptr_shader_vs, 0, bo_shader_size);
2944 
2945 	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST);
2946 	CU_ASSERT_EQUAL(r, 0);
2947 
2948 	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
2949 	CU_ASSERT_EQUAL(r, 0);
2950 
2951 	amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
2952 			mc_address_shader_ps, mc_address_shader_vs, ring);
2953 
2954 	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
2955 	CU_ASSERT_EQUAL(r, 0);
2956 
2957 	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
2958 	CU_ASSERT_EQUAL(r, 0);
2959 }
2960 
amdgpu_memcpy_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring)2961 static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
2962 			       amdgpu_bo_handle bo_shader_ps,
2963 			       amdgpu_bo_handle bo_shader_vs,
2964 			       uint64_t mc_address_shader_ps,
2965 			       uint64_t mc_address_shader_vs,
2966 			       uint32_t ring)
2967 {
2968 	amdgpu_context_handle context_handle;
2969 	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
2970 	volatile unsigned char *ptr_dst;
2971 	unsigned char *ptr_src;
2972 	uint32_t *ptr_cmd;
2973 	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
2974 	amdgpu_va_handle va_dst, va_src, va_cmd;
2975 	int i, r;
2976 	int bo_size = 16384;
2977 	int bo_cmd_size = 4096;
2978 	struct amdgpu_cs_request ibs_request = {0};
2979 	struct amdgpu_cs_ib_info ib_info= {0};
2980 	uint32_t hang_state, hangs, expired;
2981 	amdgpu_bo_list_handle bo_list;
2982 	struct amdgpu_cs_fence fence_status = {0};
2983 
2984 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2985 	CU_ASSERT_EQUAL(r, 0);
2986 
2987 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2988 				    AMDGPU_GEM_DOMAIN_GTT, 0,
2989 				    &bo_cmd, (void **)&ptr_cmd,
2990 				    &mc_address_cmd, &va_cmd);
2991 	CU_ASSERT_EQUAL(r, 0);
2992 	memset(ptr_cmd, 0, bo_cmd_size);
2993 
2994 	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
2995 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2996 					&bo_src, (void **)&ptr_src,
2997 					&mc_address_src, &va_src);
2998 	CU_ASSERT_EQUAL(r, 0);
2999 
3000 	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3001 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3002 					&bo_dst, (void **)&ptr_dst,
3003 					&mc_address_dst, &va_dst);
3004 	CU_ASSERT_EQUAL(r, 0);
3005 
3006 	memset(ptr_src, 0x55, bo_size);
3007 
3008 	i = 0;
3009 	i += amdgpu_draw_init(ptr_cmd + i);
3010 
3011 	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst);
3012 
3013 	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i);
3014 
3015 	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs);
3016 
3017 	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3018 
3019 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3020 	ptr_cmd[i++] = 0xc;
3021 	ptr_cmd[i++] = mc_address_src >> 8;
3022 	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3023 	ptr_cmd[i++] = 0x7c01f;
3024 	ptr_cmd[i++] = 0x90500fac;
3025 	ptr_cmd[i++] = 0x3e000;
3026 	i += 3;
3027 
3028 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3029 	ptr_cmd[i++] = 0x14;
3030 	ptr_cmd[i++] = 0x92;
3031 	i += 3;
3032 
3033 	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3034 	ptr_cmd[i++] = 0x191;
3035 	ptr_cmd[i++] = 0;
3036 
3037 	i += amdgpu_draw_draw(ptr_cmd + i);
3038 
3039 	while (i & 7)
3040 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3041 
3042 	resources[0] = bo_dst;
3043 	resources[1] = bo_src;
3044 	resources[2] = bo_shader_ps;
3045 	resources[3] = bo_shader_vs;
3046 	resources[4] = bo_cmd;
3047 	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3048 	CU_ASSERT_EQUAL(r, 0);
3049 
3050 	ib_info.ib_mc_address = mc_address_cmd;
3051 	ib_info.size = i;
3052 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3053 	ibs_request.ring = ring;
3054 	ibs_request.resources = bo_list;
3055 	ibs_request.number_of_ibs = 1;
3056 	ibs_request.ibs = &ib_info;
3057 	ibs_request.fence_info.handle = NULL;
3058 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3059 	CU_ASSERT_EQUAL(r, 0);
3060 
3061 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3062 	fence_status.ip_instance = 0;
3063 	fence_status.ring = ring;
3064 	fence_status.context = context_handle;
3065 	fence_status.fence = ibs_request.seq_no;
3066 
3067 	/* wait for IB accomplished */
3068 	r = amdgpu_cs_query_fence_status(&fence_status,
3069 					 AMDGPU_TIMEOUT_INFINITE,
3070 					 0, &expired);
3071 	CU_ASSERT_EQUAL(r, 0);
3072 	CU_ASSERT_EQUAL(expired, true);
3073 
3074 	/* verify if memcpy test result meets with expected */
3075 	i = 0;
3076 	while(i < bo_size) {
3077 		CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
3078 		i++;
3079 	}
3080 
3081 	r = amdgpu_bo_list_destroy(bo_list);
3082 	CU_ASSERT_EQUAL(r, 0);
3083 
3084 	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3085 	CU_ASSERT_EQUAL(r, 0);
3086 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3087 	CU_ASSERT_EQUAL(r, 0);
3088 
3089 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3090 	CU_ASSERT_EQUAL(r, 0);
3091 
3092 	r = amdgpu_cs_ctx_free(context_handle);
3093 	CU_ASSERT_EQUAL(r, 0);
3094 }
3095 
amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle,uint32_t ring)3096 static void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring)
3097 {
3098 	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3099 	void *ptr_shader_ps;
3100 	void *ptr_shader_vs;
3101 	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3102 	amdgpu_va_handle va_shader_ps, va_shader_vs;
3103 	int bo_shader_size = 4096;
3104 	int r;
3105 
3106 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3107 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3108 					&bo_shader_ps, &ptr_shader_ps,
3109 					&mc_address_shader_ps, &va_shader_ps);
3110 	CU_ASSERT_EQUAL(r, 0);
3111 	memset(ptr_shader_ps, 0, bo_shader_size);
3112 
3113 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3114 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3115 					&bo_shader_vs, &ptr_shader_vs,
3116 					&mc_address_shader_vs, &va_shader_vs);
3117 	CU_ASSERT_EQUAL(r, 0);
3118 	memset(ptr_shader_vs, 0, bo_shader_size);
3119 
3120 	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_TEX);
3121 	CU_ASSERT_EQUAL(r, 0);
3122 
3123 	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3124 	CU_ASSERT_EQUAL(r, 0);
3125 
3126 	amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
3127 			mc_address_shader_ps, mc_address_shader_vs, ring);
3128 
3129 	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3130 	CU_ASSERT_EQUAL(r, 0);
3131 
3132 	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3133 	CU_ASSERT_EQUAL(r, 0);
3134 }
3135 
amdgpu_draw_test(void)3136 static void amdgpu_draw_test(void)
3137 {
3138 	int r;
3139 	struct drm_amdgpu_info_hw_ip info;
3140 	uint32_t ring_id;
3141 
3142 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
3143 	CU_ASSERT_EQUAL(r, 0);
3144 	if (!info.available_rings)
3145 		printf("SKIP ... as there's no graphics ring\n");
3146 
3147 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
3148 		amdgpu_memset_draw_test(device_handle, ring_id);
3149 		amdgpu_memcpy_draw_test(device_handle, ring_id);
3150 	}
3151 }
3152 
amdgpu_gpu_reset_test(void)3153 static void amdgpu_gpu_reset_test(void)
3154 {
3155 	int r;
3156 	char debugfs_path[256], tmp[10];
3157 	int fd;
3158 	struct stat sbuf;
3159 	amdgpu_context_handle context_handle;
3160 	uint32_t hang_state, hangs;
3161 
3162 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3163 	CU_ASSERT_EQUAL(r, 0);
3164 
3165 	r = fstat(drm_amdgpu[0], &sbuf);
3166 	CU_ASSERT_EQUAL(r, 0);
3167 
3168 	sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
3169 	fd = open(debugfs_path, O_RDONLY);
3170 	CU_ASSERT(fd >= 0);
3171 
3172 	r = read(fd, tmp, sizeof(tmp)/sizeof(char));
3173 	CU_ASSERT(r > 0);
3174 
3175 	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3176 	CU_ASSERT_EQUAL(r, 0);
3177 	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3178 
3179 	close(fd);
3180 	r = amdgpu_cs_ctx_free(context_handle);
3181 	CU_ASSERT_EQUAL(r, 0);
3182 
3183 	amdgpu_compute_dispatch_test();
3184 	amdgpu_gfx_dispatch_test();
3185 }
3186