1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22 */
23 
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <unistd.h>
27 #include <sys/types.h>
28 #ifdef MAJOR_IN_SYSMACROS
29 #include <sys/sysmacros.h>
30 #endif
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #if HAVE_ALLOCA_H
34 # include <alloca.h>
35 #endif
36 #include <sys/wait.h>
37 
38 #include "CUnit/Basic.h"
39 
40 #include "amdgpu_test.h"
41 #include "amdgpu_drm.h"
42 #include "amdgpu_internal.h"
43 #include "util_math.h"
44 
45 static  amdgpu_device_handle device_handle;
46 static  uint32_t  major_version;
47 static  uint32_t  minor_version;
48 static  uint32_t  family_id;
49 static  uint32_t  chip_id;
50 static  uint32_t  chip_rev;
51 
52 static void amdgpu_query_info_test(void);
53 static void amdgpu_command_submission_gfx(void);
54 static void amdgpu_command_submission_compute(void);
55 static void amdgpu_command_submission_multi_fence(void);
56 static void amdgpu_command_submission_sdma(void);
57 static void amdgpu_userptr_test(void);
58 static void amdgpu_semaphore_test(void);
59 static void amdgpu_sync_dependency_test(void);
60 static void amdgpu_bo_eviction_test(void);
61 static void amdgpu_compute_dispatch_test(void);
62 static void amdgpu_gfx_dispatch_test(void);
63 static void amdgpu_draw_test(void);
64 static void amdgpu_gpu_reset_test(void);
65 static void amdgpu_stable_pstate_test(void);
66 
67 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
68 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
69 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
70 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
71 				       unsigned ip_type,
72 				       int instance, int pm4_dw, uint32_t *pm4_src,
73 				       int res_cnt, amdgpu_bo_handle *resources,
74 				       struct amdgpu_cs_ib_info *ib_info,
75 				       struct amdgpu_cs_request *ibs_request);
76 
77 CU_TestInfo basic_tests[] = {
78 	{ "Query Info Test",  amdgpu_query_info_test },
79 	{ "Userptr Test",  amdgpu_userptr_test },
80 	{ "bo eviction Test",  amdgpu_bo_eviction_test },
81 	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
82 	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
83 	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
84 	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
85 	{ "SW semaphore Test",  amdgpu_semaphore_test },
86 	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
87 	{ "Dispatch Test (Compute)",  amdgpu_compute_dispatch_test },
88 	{ "Dispatch Test (GFX)",  amdgpu_gfx_dispatch_test },
89 	{ "Draw Test",  amdgpu_draw_test },
90 	{ "GPU reset Test", amdgpu_gpu_reset_test },
91 	{ "Stable pstate Test", amdgpu_stable_pstate_test },
92 	CU_TEST_INFO_NULL,
93 };
94 #define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
95 #define SDMA_PKT_HEADER_op_offset 0
96 #define SDMA_PKT_HEADER_op_mask   0x000000FF
97 #define SDMA_PKT_HEADER_op_shift  0
98 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
99 #define SDMA_OPCODE_CONSTANT_FILL  11
100 #       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
101 	/* 0 = byte fill
102 	 * 2 = DW fill
103 	 */
104 #define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
105 					(((sub_op) & 0xFF) << 8) |	\
106 					(((op) & 0xFF) << 0))
107 #define	SDMA_OPCODE_WRITE				  2
108 #       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
109 #       define SDMA_WRTIE_SUB_OPCODE_TILED                1
110 
111 #define	SDMA_OPCODE_COPY				  1
112 #       define SDMA_COPY_SUB_OPCODE_LINEAR                0
113 
114 #define	SDMA_OPCODE_ATOMIC				  10
115 #		define SDMA_ATOMIC_LOOP(x)               ((x) << 0)
116         /* 0 - single_pass_atomic.
117          * 1 - loop_until_compare_satisfied.
118          */
119 #		define SDMA_ATOMIC_TMZ(x)                ((x) << 2)
120 		/* 0 - non-TMZ.
121 		 * 1 - TMZ.
122 	     */
123 #		define SDMA_ATOMIC_OPCODE(x)             ((x) << 9)
124 		/* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
125 		 * same as Packet 3
126 		 */
127 
128 #define GFX_COMPUTE_NOP  0xffff1000
129 #define SDMA_NOP  0x0
130 
131 /* PM4 */
132 #define	PACKET_TYPE0	0
133 #define	PACKET_TYPE1	1
134 #define	PACKET_TYPE2	2
135 #define	PACKET_TYPE3	3
136 
137 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
138 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
139 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
140 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
141 #define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
142 			 ((reg) & 0xFFFF) |			\
143 			 ((n) & 0x3FFF) << 16)
144 #define CP_PACKET2			0x80000000
145 #define		PACKET2_PAD_SHIFT		0
146 #define		PACKET2_PAD_MASK		(0x3fffffff << 0)
147 
148 #define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
149 
150 #define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
151 			 (((op) & 0xFF) << 8) |				\
152 			 ((n) & 0x3FFF) << 16)
153 #define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
154 
155 /* Packet 3 types */
156 #define	PACKET3_NOP					0x10
157 
158 #define	PACKET3_WRITE_DATA				0x37
159 #define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
160 		/* 0 - register
161 		 * 1 - memory (sync - via GRBM)
162 		 * 2 - gl2
163 		 * 3 - gds
164 		 * 4 - reserved
165 		 * 5 - memory (async - direct)
166 		 */
167 #define		WR_ONE_ADDR                             (1 << 16)
168 #define		WR_CONFIRM                              (1 << 20)
169 #define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
170 		/* 0 - LRU
171 		 * 1 - Stream
172 		 */
173 #define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
174 		/* 0 - me
175 		 * 1 - pfp
176 		 * 2 - ce
177 		 */
178 
179 #define	PACKET3_ATOMIC_MEM				0x1E
180 #define     TC_OP_ATOMIC_CMPSWAP_RTN_32          0x00000008
181 #define     ATOMIC_MEM_COMMAND(x)               ((x) << 8)
182             /* 0 - single_pass_atomic.
183              * 1 - loop_until_compare_satisfied.
184              */
185 #define     ATOMIC_MEM_CACHEPOLICAY(x)          ((x) << 25)
186             /* 0 - lru.
187              * 1 - stream.
188              */
189 #define     ATOMIC_MEM_ENGINESEL(x)             ((x) << 30)
190             /* 0 - micro_engine.
191 			 */
192 
193 #define	PACKET3_DMA_DATA				0x50
194 /* 1. header
195  * 2. CONTROL
196  * 3. SRC_ADDR_LO or DATA [31:0]
197  * 4. SRC_ADDR_HI [31:0]
198  * 5. DST_ADDR_LO [31:0]
199  * 6. DST_ADDR_HI [7:0]
200  * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
201  */
202 /* CONTROL */
203 #              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
204 		/* 0 - ME
205 		 * 1 - PFP
206 		 */
207 #              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
208 		/* 0 - LRU
209 		 * 1 - Stream
210 		 * 2 - Bypass
211 		 */
212 #              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
213 #              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
214 		/* 0 - DST_ADDR using DAS
215 		 * 1 - GDS
216 		 * 3 - DST_ADDR using L2
217 		 */
218 #              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
219 		/* 0 - LRU
220 		 * 1 - Stream
221 		 * 2 - Bypass
222 		 */
223 #              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
224 #              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
225 		/* 0 - SRC_ADDR using SAS
226 		 * 1 - GDS
227 		 * 2 - DATA
228 		 * 3 - SRC_ADDR using L2
229 		 */
230 #              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
231 /* COMMAND */
232 #              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
233 #              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
234 		/* 0 - none
235 		 * 1 - 8 in 16
236 		 * 2 - 8 in 32
237 		 * 3 - 8 in 64
238 		 */
239 #              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
240 		/* 0 - none
241 		 * 1 - 8 in 16
242 		 * 2 - 8 in 32
243 		 * 3 - 8 in 64
244 		 */
245 #              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
246 		/* 0 - memory
247 		 * 1 - register
248 		 */
249 #              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
250 		/* 0 - memory
251 		 * 1 - register
252 		 */
253 #              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
254 #              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
255 #              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
256 
257 #define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
258 						(((b) & 0x1) << 26) |		\
259 						(((t) & 0x1) << 23) |		\
260 						(((s) & 0x1) << 22) |		\
261 						(((cnt) & 0xFFFFF) << 0))
262 #define	SDMA_OPCODE_COPY_SI	3
263 #define SDMA_OPCODE_CONSTANT_FILL_SI	13
264 #define SDMA_NOP_SI  0xf
265 #define GFX_COMPUTE_NOP_SI 0x80000000
266 #define	PACKET3_DMA_DATA_SI	0x41
267 #              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
268 		/* 0 - ME
269 		 * 1 - PFP
270 		 */
271 #              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
272 		/* 0 - DST_ADDR using DAS
273 		 * 1 - GDS
274 		 * 3 - DST_ADDR using L2
275 		 */
276 #              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
277 		/* 0 - SRC_ADDR using SAS
278 		 * 1 - GDS
279 		 * 2 - DATA
280 		 * 3 - SRC_ADDR using L2
281 		 */
282 #              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
283 
284 
285 #define PKT3_CONTEXT_CONTROL                   0x28
286 #define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
287 #define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
288 #define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
289 
290 #define PKT3_CLEAR_STATE                       0x12
291 
292 #define PKT3_SET_SH_REG                        0x76
293 #define		PACKET3_SET_SH_REG_START			0x00002c00
294 
295 #define	PACKET3_DISPATCH_DIRECT				0x15
296 #define PACKET3_EVENT_WRITE				0x46
297 #define PACKET3_ACQUIRE_MEM				0x58
298 #define PACKET3_SET_CONTEXT_REG				0x69
299 #define PACKET3_SET_UCONFIG_REG				0x79
300 #define PACKET3_DRAW_INDEX_AUTO				0x2D
301 /* gfx 8 */
302 #define mmCOMPUTE_PGM_LO                                                        0x2e0c
303 #define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
304 #define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
305 #define mmCOMPUTE_USER_DATA_0                                                   0x2e40
306 #define mmCOMPUTE_USER_DATA_1                                                   0x2e41
307 #define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
308 #define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
309 
310 
311 
312 #define SWAP_32(num) (((num & 0xff000000) >> 24) | \
313 		      ((num & 0x0000ff00) << 8) | \
314 		      ((num & 0x00ff0000) >> 8) | \
315 		      ((num & 0x000000ff) << 24))
316 
317 
318 /* Shader code
319  * void main()
320 {
321 
322 	float x = some_input;
323 		for (unsigned i = 0; i < 1000000; i++)
324   	x = sin(x);
325 
326 	u[0] = 42u;
327 }
328 */
329 
330 static  uint32_t shader_bin[] = {
331 	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
332 	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
333 	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
334 	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
335 };
336 
337 #define CODE_OFFSET 512
338 #define DATA_OFFSET 1024
339 
340 enum cs_type {
341 	CS_BUFFERCLEAR,
342 	CS_BUFFERCOPY,
343 	CS_HANG,
344 	CS_HANG_SLOW
345 };
346 
347 static const uint32_t bufferclear_cs_shader_gfx9[] = {
348     0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
349     0x7e020280, 0x7e040204, 0x7e060205, 0x7e080206,
350     0x7e0a0207, 0xe01c2000, 0x80000200, 0xbf8c0000,
351     0xbf810000
352 };
353 
354 static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
355 	{0x2e12, 0x000C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x000C0041 },
356 	{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
357 	{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
358 	{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
359 	{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
360 };
361 
362 static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
363 
364 static const uint32_t buffercopy_cs_shader_gfx9[] = {
365     0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
366     0x7e020280, 0xe00c2000, 0x80000200, 0xbf8c0f70,
367     0xe01c2000, 0x80010200, 0xbf810000
368 };
369 
370 static const uint32_t preamblecache_gfx9[] = {
371 	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
372 	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
373 	0xc0026900, 0xb4,  0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
374 	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
375 	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
376 	0xc0016900, 0x2d5, 0x10000, 0xc0016900,  0x2dc, 0x0,
377 	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
378 	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
379 	0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
380 	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
381 	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
382 	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
383 	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
384 	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
385 	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
386 	0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
387 	0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
388 	0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
389 	0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
390 	0xc0017900, 0x24b, 0x0
391 };
392 
393 enum ps_type {
394 	PS_CONST,
395 	PS_TEX,
396 	PS_HANG,
397 	PS_HANG_SLOW
398 };
399 
400 static const uint32_t ps_const_shader_gfx9[] = {
401     0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
402     0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
403     0xC4001C0F, 0x00000100, 0xBF810000
404 };
405 
406 static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
407 
408 static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
409     {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
410      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
411      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
412      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
413      { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
414      { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
415      { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
416      { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
417      { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
418      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
419     }
420 };
421 
422 static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
423     0x00000004
424 };
425 
426 static const uint32_t ps_num_sh_registers_gfx9 = 2;
427 
428 static const uint32_t ps_const_sh_registers_gfx9[][2] = {
429     {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
430     {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
431 };
432 
433 static const uint32_t ps_num_context_registers_gfx9 = 7;
434 
435 static const uint32_t ps_const_context_reg_gfx9[][2] = {
436     {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
437     {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL,       0x00000000 },
438     {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
439     {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
440     {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
441     {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
442     {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004 }
443 };
444 
445 static const uint32_t ps_tex_shader_gfx9[] = {
446     0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
447     0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
448     0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
449     0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
450     0x00000100, 0xBF810000
451 };
452 
453 static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
454     0x0000000B
455 };
456 
457 static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
458 
459 static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
460     {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
461      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
462      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
463      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
464      { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
465      { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
466      { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
467      { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
468      { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
469      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
470     }
471 };
472 
473 static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
474     {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
475     {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
476 };
477 
478 static const uint32_t ps_tex_context_reg_gfx9[][2] = {
479     {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
480     {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL,       0x00000001 },
481     {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
482     {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
483     {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
484     {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
485     {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004  }
486 };
487 
488 static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
489     0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
490     0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
491     0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
492     0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
493     0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
494     0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
495     0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
496     0xC400020F, 0x05060403, 0xBF810000
497 };
498 
499 static const uint32_t cached_cmd_gfx9[] = {
500 	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
501 	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
502 	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
503 	0xc0056900, 0x105, 0x0, 0x0,  0x0, 0x0, 0x12,
504 	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
505 	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
506 	0xc0026900, 0x292, 0x20, 0x60201b8,
507 	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
508 };
509 
510 unsigned int memcpy_ps_hang[] = {
511         0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
512         0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
513         0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
514         0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
515         0xF800180F, 0x03020100, 0xBF810000
516 };
517 
518 struct amdgpu_test_shader {
519 	uint32_t *shader;
520 	uint32_t header_length;
521 	uint32_t body_length;
522 	uint32_t foot_length;
523 };
524 
525 unsigned int memcpy_cs_hang_slow_ai_codes[] = {
526     0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
527     0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
528 };
529 
530 struct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
531         memcpy_cs_hang_slow_ai_codes,
532         4,
533         3,
534         1
535 };
536 
537 unsigned int memcpy_cs_hang_slow_rv_codes[] = {
538     0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
539     0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
540 };
541 
542 struct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
543         memcpy_cs_hang_slow_rv_codes,
544         4,
545         3,
546         1
547 };
548 
549 unsigned int memcpy_ps_hang_slow_ai_codes[] = {
550         0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
551         0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
552         0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
553         0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
554         0x03020100, 0xbf810000
555 };
556 
557 struct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
558         memcpy_ps_hang_slow_ai_codes,
559         7,
560         2,
561         9
562 };
563 
amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev,unsigned size,unsigned alignment,unsigned heap,uint64_t alloc_flags,uint64_t mapping_flags,amdgpu_bo_handle * bo,void ** cpu,uint64_t * mc_address,amdgpu_va_handle * va_handle)564 int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
565 			unsigned alignment, unsigned heap, uint64_t alloc_flags,
566 			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
567 			uint64_t *mc_address,
568 			amdgpu_va_handle *va_handle)
569 {
570 	struct amdgpu_bo_alloc_request request = {};
571 	amdgpu_bo_handle buf_handle;
572 	amdgpu_va_handle handle;
573 	uint64_t vmc_addr;
574 	int r;
575 
576 	request.alloc_size = size;
577 	request.phys_alignment = alignment;
578 	request.preferred_heap = heap;
579 	request.flags = alloc_flags;
580 
581 	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
582 	if (r)
583 		return r;
584 
585 	r = amdgpu_va_range_alloc(dev,
586 				  amdgpu_gpu_va_range_general,
587 				  size, alignment, 0, &vmc_addr,
588 				  &handle, 0);
589 	if (r)
590 		goto error_va_alloc;
591 
592 	r = amdgpu_bo_va_op_raw(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
593 				   AMDGPU_VM_PAGE_READABLE |
594 				   AMDGPU_VM_PAGE_WRITEABLE |
595 				   AMDGPU_VM_PAGE_EXECUTABLE |
596 				   mapping_flags,
597 				   AMDGPU_VA_OP_MAP);
598 	if (r)
599 		goto error_va_map;
600 
601 	r = amdgpu_bo_cpu_map(buf_handle, cpu);
602 	if (r)
603 		goto error_cpu_map;
604 
605 	*bo = buf_handle;
606 	*mc_address = vmc_addr;
607 	*va_handle = handle;
608 
609 	return 0;
610 
611  error_cpu_map:
612 	amdgpu_bo_cpu_unmap(buf_handle);
613 
614  error_va_map:
615 	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
616 
617  error_va_alloc:
618 	amdgpu_bo_free(buf_handle);
619 	return r;
620 }
621 
622 
623 
suite_basic_tests_enable(void)624 CU_BOOL suite_basic_tests_enable(void)
625 {
626 
627 	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
628 					     &minor_version, &device_handle))
629 		return CU_FALSE;
630 
631 
632 	family_id = device_handle->info.family_id;
633 	chip_id = device_handle->info.chip_external_rev;
634 	chip_rev = device_handle->info.chip_rev;
635 
636 	if (amdgpu_device_deinitialize(device_handle))
637 		return CU_FALSE;
638 
639 	/* disable gfx engine basic test cases for some asics have no CPG */
640 	if (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) {
641 		if (amdgpu_set_test_active("Basic Tests",
642 					"Command submission Test (GFX)",
643 					CU_FALSE))
644 			fprintf(stderr, "test deactivation failed - %s\n",
645 				CU_get_error_msg());
646 
647 		if (amdgpu_set_test_active("Basic Tests",
648 					"Command submission Test (Multi-Fence)",
649 					CU_FALSE))
650 			fprintf(stderr, "test deactivation failed - %s\n",
651 				CU_get_error_msg());
652 
653 		if (amdgpu_set_test_active("Basic Tests",
654 					"Sync dependency Test",
655 					CU_FALSE))
656 			fprintf(stderr, "test deactivation failed - %s\n",
657 				CU_get_error_msg());
658 	}
659 
660 	return CU_TRUE;
661 }
662 
suite_basic_tests_init(void)663 int suite_basic_tests_init(void)
664 {
665 	struct amdgpu_gpu_info gpu_info = {0};
666 	int r;
667 
668 	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
669 				   &minor_version, &device_handle);
670 
671 	if (r) {
672 		if ((r == -EACCES) && (errno == EACCES))
673 			printf("\n\nError:%s. "
674 				"Hint:Try to run this test program as root.",
675 				strerror(errno));
676 		return CUE_SINIT_FAILED;
677 	}
678 
679 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
680 	if (r)
681 		return CUE_SINIT_FAILED;
682 
683 	family_id = gpu_info.family_id;
684 
685 	return CUE_SUCCESS;
686 }
687 
suite_basic_tests_clean(void)688 int suite_basic_tests_clean(void)
689 {
690 	int r = amdgpu_device_deinitialize(device_handle);
691 
692 	if (r == 0)
693 		return CUE_SUCCESS;
694 	else
695 		return CUE_SCLEAN_FAILED;
696 }
697 
amdgpu_query_info_test(void)698 static void amdgpu_query_info_test(void)
699 {
700 	struct amdgpu_gpu_info gpu_info = {0};
701 	uint32_t version, feature;
702 	int r;
703 
704 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
705 	CU_ASSERT_EQUAL(r, 0);
706 
707 	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
708 					  0, &version, &feature);
709 	CU_ASSERT_EQUAL(r, 0);
710 }
711 
amdgpu_command_submission_gfx_separate_ibs(void)712 static void amdgpu_command_submission_gfx_separate_ibs(void)
713 {
714 	amdgpu_context_handle context_handle;
715 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
716 	void *ib_result_cpu, *ib_result_ce_cpu;
717 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
718 	struct amdgpu_cs_request ibs_request = {0};
719 	struct amdgpu_cs_ib_info ib_info[2];
720 	struct amdgpu_cs_fence fence_status = {0};
721 	uint32_t *ptr;
722 	uint32_t expired;
723 	amdgpu_bo_list_handle bo_list;
724 	amdgpu_va_handle va_handle, va_handle_ce;
725 	int r, i = 0;
726 
727 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
728 	CU_ASSERT_EQUAL(r, 0);
729 
730 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
731 				    AMDGPU_GEM_DOMAIN_GTT, 0,
732 				    &ib_result_handle, &ib_result_cpu,
733 				    &ib_result_mc_address, &va_handle);
734 	CU_ASSERT_EQUAL(r, 0);
735 
736 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
737 				    AMDGPU_GEM_DOMAIN_GTT, 0,
738 				    &ib_result_ce_handle, &ib_result_ce_cpu,
739 				    &ib_result_ce_mc_address, &va_handle_ce);
740 	CU_ASSERT_EQUAL(r, 0);
741 
742 	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
743 			       ib_result_ce_handle, &bo_list);
744 	CU_ASSERT_EQUAL(r, 0);
745 
746 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
747 
748 	/* IT_SET_CE_DE_COUNTERS */
749 	ptr = ib_result_ce_cpu;
750 	if (family_id != AMDGPU_FAMILY_SI) {
751 		ptr[i++] = 0xc0008900;
752 		ptr[i++] = 0;
753 	}
754 	ptr[i++] = 0xc0008400;
755 	ptr[i++] = 1;
756 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
757 	ib_info[0].size = i;
758 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
759 
760 	/* IT_WAIT_ON_CE_COUNTER */
761 	ptr = ib_result_cpu;
762 	ptr[0] = 0xc0008600;
763 	ptr[1] = 0x00000001;
764 	ib_info[1].ib_mc_address = ib_result_mc_address;
765 	ib_info[1].size = 2;
766 
767 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
768 	ibs_request.number_of_ibs = 2;
769 	ibs_request.ibs = ib_info;
770 	ibs_request.resources = bo_list;
771 	ibs_request.fence_info.handle = NULL;
772 
773 	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
774 
775 	CU_ASSERT_EQUAL(r, 0);
776 
777 	fence_status.context = context_handle;
778 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
779 	fence_status.ip_instance = 0;
780 	fence_status.fence = ibs_request.seq_no;
781 
782 	r = amdgpu_cs_query_fence_status(&fence_status,
783 					 AMDGPU_TIMEOUT_INFINITE,
784 					 0, &expired);
785 	CU_ASSERT_EQUAL(r, 0);
786 
787 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
788 				     ib_result_mc_address, 4096);
789 	CU_ASSERT_EQUAL(r, 0);
790 
791 	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
792 				     ib_result_ce_mc_address, 4096);
793 	CU_ASSERT_EQUAL(r, 0);
794 
795 	r = amdgpu_bo_list_destroy(bo_list);
796 	CU_ASSERT_EQUAL(r, 0);
797 
798 	r = amdgpu_cs_ctx_free(context_handle);
799 	CU_ASSERT_EQUAL(r, 0);
800 
801 }
802 
amdgpu_command_submission_gfx_shared_ib(void)803 static void amdgpu_command_submission_gfx_shared_ib(void)
804 {
805 	amdgpu_context_handle context_handle;
806 	amdgpu_bo_handle ib_result_handle;
807 	void *ib_result_cpu;
808 	uint64_t ib_result_mc_address;
809 	struct amdgpu_cs_request ibs_request = {0};
810 	struct amdgpu_cs_ib_info ib_info[2];
811 	struct amdgpu_cs_fence fence_status = {0};
812 	uint32_t *ptr;
813 	uint32_t expired;
814 	amdgpu_bo_list_handle bo_list;
815 	amdgpu_va_handle va_handle;
816 	int r, i = 0;
817 
818 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
819 	CU_ASSERT_EQUAL(r, 0);
820 
821 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
822 				    AMDGPU_GEM_DOMAIN_GTT, 0,
823 				    &ib_result_handle, &ib_result_cpu,
824 				    &ib_result_mc_address, &va_handle);
825 	CU_ASSERT_EQUAL(r, 0);
826 
827 	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
828 			       &bo_list);
829 	CU_ASSERT_EQUAL(r, 0);
830 
831 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
832 
833 	/* IT_SET_CE_DE_COUNTERS */
834 	ptr = ib_result_cpu;
835 	if (family_id != AMDGPU_FAMILY_SI) {
836 		ptr[i++] = 0xc0008900;
837 		ptr[i++] = 0;
838 	}
839 	ptr[i++] = 0xc0008400;
840 	ptr[i++] = 1;
841 	ib_info[0].ib_mc_address = ib_result_mc_address;
842 	ib_info[0].size = i;
843 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
844 
845 	ptr = (uint32_t *)ib_result_cpu + 4;
846 	ptr[0] = 0xc0008600;
847 	ptr[1] = 0x00000001;
848 	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
849 	ib_info[1].size = 2;
850 
851 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
852 	ibs_request.number_of_ibs = 2;
853 	ibs_request.ibs = ib_info;
854 	ibs_request.resources = bo_list;
855 	ibs_request.fence_info.handle = NULL;
856 
857 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
858 
859 	CU_ASSERT_EQUAL(r, 0);
860 
861 	fence_status.context = context_handle;
862 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
863 	fence_status.ip_instance = 0;
864 	fence_status.fence = ibs_request.seq_no;
865 
866 	r = amdgpu_cs_query_fence_status(&fence_status,
867 					 AMDGPU_TIMEOUT_INFINITE,
868 					 0, &expired);
869 	CU_ASSERT_EQUAL(r, 0);
870 
871 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
872 				     ib_result_mc_address, 4096);
873 	CU_ASSERT_EQUAL(r, 0);
874 
875 	r = amdgpu_bo_list_destroy(bo_list);
876 	CU_ASSERT_EQUAL(r, 0);
877 
878 	r = amdgpu_cs_ctx_free(context_handle);
879 	CU_ASSERT_EQUAL(r, 0);
880 }
881 
amdgpu_command_submission_gfx_cp_write_data(void)882 static void amdgpu_command_submission_gfx_cp_write_data(void)
883 {
884 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
885 }
886 
amdgpu_command_submission_gfx_cp_const_fill(void)887 static void amdgpu_command_submission_gfx_cp_const_fill(void)
888 {
889 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
890 }
891 
amdgpu_command_submission_gfx_cp_copy_data(void)892 static void amdgpu_command_submission_gfx_cp_copy_data(void)
893 {
894 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
895 }
896 
amdgpu_bo_eviction_test(void)897 static void amdgpu_bo_eviction_test(void)
898 {
899 	const int sdma_write_length = 1024;
900 	const int pm4_dw = 256;
901 	amdgpu_context_handle context_handle;
902 	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
903 	amdgpu_bo_handle *resources;
904 	uint32_t *pm4;
905 	struct amdgpu_cs_ib_info *ib_info;
906 	struct amdgpu_cs_request *ibs_request;
907 	uint64_t bo1_mc, bo2_mc;
908 	volatile unsigned char *bo1_cpu, *bo2_cpu;
909 	int i, j, r, loop1, loop2;
910 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
911 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
912 	struct amdgpu_heap_info vram_info, gtt_info;
913 
914 	pm4 = calloc(pm4_dw, sizeof(*pm4));
915 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
916 
917 	ib_info = calloc(1, sizeof(*ib_info));
918 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
919 
920 	ibs_request = calloc(1, sizeof(*ibs_request));
921 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
922 
923 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
924 	CU_ASSERT_EQUAL(r, 0);
925 
926 	/* prepare resource */
927 	resources = calloc(4, sizeof(amdgpu_bo_handle));
928 	CU_ASSERT_NOT_EQUAL(resources, NULL);
929 
930 	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
931 				   0, &vram_info);
932 	CU_ASSERT_EQUAL(r, 0);
933 
934 	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
935 				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
936 	CU_ASSERT_EQUAL(r, 0);
937 	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
938 				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
939 	CU_ASSERT_EQUAL(r, 0);
940 
941 	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
942 				   0, &gtt_info);
943 	CU_ASSERT_EQUAL(r, 0);
944 
945 	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
946 				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
947 	CU_ASSERT_EQUAL(r, 0);
948 	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
949 				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
950 	CU_ASSERT_EQUAL(r, 0);
951 
952 
953 
954 	loop1 = loop2 = 0;
955 	/* run 9 circle to test all mapping combination */
956 	while(loop1 < 2) {
957 		while(loop2 < 2) {
958 			/* allocate UC bo1for sDMA use */
959 			r = amdgpu_bo_alloc_and_map(device_handle,
960 						    sdma_write_length, 4096,
961 						    AMDGPU_GEM_DOMAIN_GTT,
962 						    gtt_flags[loop1], &bo1,
963 						    (void**)&bo1_cpu, &bo1_mc,
964 						    &bo1_va_handle);
965 			CU_ASSERT_EQUAL(r, 0);
966 
967 			/* set bo1 */
968 			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
969 
970 			/* allocate UC bo2 for sDMA use */
971 			r = amdgpu_bo_alloc_and_map(device_handle,
972 						    sdma_write_length, 4096,
973 						    AMDGPU_GEM_DOMAIN_GTT,
974 						    gtt_flags[loop2], &bo2,
975 						    (void**)&bo2_cpu, &bo2_mc,
976 						    &bo2_va_handle);
977 			CU_ASSERT_EQUAL(r, 0);
978 
979 			/* clear bo2 */
980 			memset((void*)bo2_cpu, 0, sdma_write_length);
981 
982 			resources[0] = bo1;
983 			resources[1] = bo2;
984 			resources[2] = vram_max[loop2];
985 			resources[3] = gtt_max[loop2];
986 
987 			/* fulfill PM4: test DMA copy linear */
988 			i = j = 0;
989 			if (family_id == AMDGPU_FAMILY_SI) {
990 				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
991 							  sdma_write_length);
992 				pm4[i++] = 0xffffffff & bo2_mc;
993 				pm4[i++] = 0xffffffff & bo1_mc;
994 				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
995 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
996 			} else {
997 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
998 				if (family_id >= AMDGPU_FAMILY_AI)
999 					pm4[i++] = sdma_write_length - 1;
1000 				else
1001 					pm4[i++] = sdma_write_length;
1002 				pm4[i++] = 0;
1003 				pm4[i++] = 0xffffffff & bo1_mc;
1004 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1005 				pm4[i++] = 0xffffffff & bo2_mc;
1006 				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1007 			}
1008 
1009 			amdgpu_test_exec_cs_helper(context_handle,
1010 						   AMDGPU_HW_IP_DMA, 0,
1011 						   i, pm4,
1012 						   4, resources,
1013 						   ib_info, ibs_request);
1014 
1015 			/* verify if SDMA test result meets with expected */
1016 			i = 0;
1017 			while(i < sdma_write_length) {
1018 				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1019 			}
1020 			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1021 						     sdma_write_length);
1022 			CU_ASSERT_EQUAL(r, 0);
1023 			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1024 						     sdma_write_length);
1025 			CU_ASSERT_EQUAL(r, 0);
1026 			loop2++;
1027 		}
1028 		loop2 = 0;
1029 		loop1++;
1030 	}
1031 	amdgpu_bo_free(vram_max[0]);
1032 	amdgpu_bo_free(vram_max[1]);
1033 	amdgpu_bo_free(gtt_max[0]);
1034 	amdgpu_bo_free(gtt_max[1]);
1035 	/* clean resources */
1036 	free(resources);
1037 	free(ibs_request);
1038 	free(ib_info);
1039 	free(pm4);
1040 
1041 	/* end of test */
1042 	r = amdgpu_cs_ctx_free(context_handle);
1043 	CU_ASSERT_EQUAL(r, 0);
1044 }
1045 
1046 
amdgpu_command_submission_gfx(void)1047 static void amdgpu_command_submission_gfx(void)
1048 {
1049 	/* write data using the CP */
1050 	amdgpu_command_submission_gfx_cp_write_data();
1051 	/* const fill using the CP */
1052 	amdgpu_command_submission_gfx_cp_const_fill();
1053 	/* copy data using the CP */
1054 	amdgpu_command_submission_gfx_cp_copy_data();
1055 	/* separate IB buffers for multi-IB submission */
1056 	amdgpu_command_submission_gfx_separate_ibs();
1057 	/* shared IB buffer for multi-IB submission */
1058 	amdgpu_command_submission_gfx_shared_ib();
1059 }
1060 
amdgpu_semaphore_test(void)1061 static void amdgpu_semaphore_test(void)
1062 {
1063 	amdgpu_context_handle context_handle[2];
1064 	amdgpu_semaphore_handle sem;
1065 	amdgpu_bo_handle ib_result_handle[2];
1066 	void *ib_result_cpu[2];
1067 	uint64_t ib_result_mc_address[2];
1068 	struct amdgpu_cs_request ibs_request[2] = {0};
1069 	struct amdgpu_cs_ib_info ib_info[2] = {0};
1070 	struct amdgpu_cs_fence fence_status = {0};
1071 	uint32_t *ptr;
1072 	uint32_t expired;
1073 	uint32_t sdma_nop, gfx_nop;
1074 	amdgpu_bo_list_handle bo_list[2];
1075 	amdgpu_va_handle va_handle[2];
1076 	int r, i;
1077 	struct amdgpu_gpu_info gpu_info = {0};
1078 	unsigned gc_ip_type;
1079 
1080 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
1081 	CU_ASSERT_EQUAL(r, 0);
1082 
1083 	gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ?
1084 			AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX;
1085 
1086 	if (family_id == AMDGPU_FAMILY_SI) {
1087 		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1088 		gfx_nop = GFX_COMPUTE_NOP_SI;
1089 	} else {
1090 		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1091 		gfx_nop = GFX_COMPUTE_NOP;
1092 	}
1093 
1094 	r = amdgpu_cs_create_semaphore(&sem);
1095 	CU_ASSERT_EQUAL(r, 0);
1096 	for (i = 0; i < 2; i++) {
1097 		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
1098 		CU_ASSERT_EQUAL(r, 0);
1099 
1100 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1101 					    AMDGPU_GEM_DOMAIN_GTT, 0,
1102 					    &ib_result_handle[i], &ib_result_cpu[i],
1103 					    &ib_result_mc_address[i], &va_handle[i]);
1104 		CU_ASSERT_EQUAL(r, 0);
1105 
1106 		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
1107 				       NULL, &bo_list[i]);
1108 		CU_ASSERT_EQUAL(r, 0);
1109 	}
1110 
1111 	/* 1. same context different engine */
1112 	ptr = ib_result_cpu[0];
1113 	ptr[0] = sdma_nop;
1114 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1115 	ib_info[0].size = 1;
1116 
1117 	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
1118 	ibs_request[0].number_of_ibs = 1;
1119 	ibs_request[0].ibs = &ib_info[0];
1120 	ibs_request[0].resources = bo_list[0];
1121 	ibs_request[0].fence_info.handle = NULL;
1122 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1123 	CU_ASSERT_EQUAL(r, 0);
1124 	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
1125 	CU_ASSERT_EQUAL(r, 0);
1126 
1127 	r = amdgpu_cs_wait_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
1128 	CU_ASSERT_EQUAL(r, 0);
1129 	ptr = ib_result_cpu[1];
1130 	ptr[0] = gfx_nop;
1131 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1132 	ib_info[1].size = 1;
1133 
1134 	ibs_request[1].ip_type = gc_ip_type;
1135 	ibs_request[1].number_of_ibs = 1;
1136 	ibs_request[1].ibs = &ib_info[1];
1137 	ibs_request[1].resources = bo_list[1];
1138 	ibs_request[1].fence_info.handle = NULL;
1139 
1140 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
1141 	CU_ASSERT_EQUAL(r, 0);
1142 
1143 	fence_status.context = context_handle[0];
1144 	fence_status.ip_type = gc_ip_type;
1145 	fence_status.ip_instance = 0;
1146 	fence_status.fence = ibs_request[1].seq_no;
1147 	r = amdgpu_cs_query_fence_status(&fence_status,
1148 					 500000000, 0, &expired);
1149 	CU_ASSERT_EQUAL(r, 0);
1150 	CU_ASSERT_EQUAL(expired, true);
1151 
1152 	/* 2. same engine different context */
1153 	ptr = ib_result_cpu[0];
1154 	ptr[0] = gfx_nop;
1155 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1156 	ib_info[0].size = 1;
1157 
1158 	ibs_request[0].ip_type = gc_ip_type;
1159 	ibs_request[0].number_of_ibs = 1;
1160 	ibs_request[0].ibs = &ib_info[0];
1161 	ibs_request[0].resources = bo_list[0];
1162 	ibs_request[0].fence_info.handle = NULL;
1163 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1164 	CU_ASSERT_EQUAL(r, 0);
1165 	r = amdgpu_cs_signal_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
1166 	CU_ASSERT_EQUAL(r, 0);
1167 
1168 	r = amdgpu_cs_wait_semaphore(context_handle[1], gc_ip_type, 0, 0, sem);
1169 	CU_ASSERT_EQUAL(r, 0);
1170 	ptr = ib_result_cpu[1];
1171 	ptr[0] = gfx_nop;
1172 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1173 	ib_info[1].size = 1;
1174 
1175 	ibs_request[1].ip_type = gc_ip_type;
1176 	ibs_request[1].number_of_ibs = 1;
1177 	ibs_request[1].ibs = &ib_info[1];
1178 	ibs_request[1].resources = bo_list[1];
1179 	ibs_request[1].fence_info.handle = NULL;
1180 	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
1181 
1182 	CU_ASSERT_EQUAL(r, 0);
1183 
1184 	fence_status.context = context_handle[1];
1185 	fence_status.ip_type = gc_ip_type;
1186 	fence_status.ip_instance = 0;
1187 	fence_status.fence = ibs_request[1].seq_no;
1188 	r = amdgpu_cs_query_fence_status(&fence_status,
1189 					 500000000, 0, &expired);
1190 	CU_ASSERT_EQUAL(r, 0);
1191 	CU_ASSERT_EQUAL(expired, true);
1192 
1193 	for (i = 0; i < 2; i++) {
1194 		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
1195 					     ib_result_mc_address[i], 4096);
1196 		CU_ASSERT_EQUAL(r, 0);
1197 
1198 		r = amdgpu_bo_list_destroy(bo_list[i]);
1199 		CU_ASSERT_EQUAL(r, 0);
1200 
1201 		r = amdgpu_cs_ctx_free(context_handle[i]);
1202 		CU_ASSERT_EQUAL(r, 0);
1203 	}
1204 
1205 	r = amdgpu_cs_destroy_semaphore(sem);
1206 	CU_ASSERT_EQUAL(r, 0);
1207 }
1208 
amdgpu_command_submission_compute_nop(void)1209 static void amdgpu_command_submission_compute_nop(void)
1210 {
1211 	amdgpu_context_handle context_handle;
1212 	amdgpu_bo_handle ib_result_handle;
1213 	void *ib_result_cpu;
1214 	uint64_t ib_result_mc_address;
1215 	struct amdgpu_cs_request ibs_request;
1216 	struct amdgpu_cs_ib_info ib_info;
1217 	struct amdgpu_cs_fence fence_status;
1218 	uint32_t *ptr;
1219 	uint32_t expired;
1220 	int r, instance;
1221 	amdgpu_bo_list_handle bo_list;
1222 	amdgpu_va_handle va_handle;
1223 	struct drm_amdgpu_info_hw_ip info;
1224 
1225 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1226 	CU_ASSERT_EQUAL(r, 0);
1227 
1228 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1229 	CU_ASSERT_EQUAL(r, 0);
1230 
1231 	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
1232 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1233 					    AMDGPU_GEM_DOMAIN_GTT, 0,
1234 					    &ib_result_handle, &ib_result_cpu,
1235 					    &ib_result_mc_address, &va_handle);
1236 		CU_ASSERT_EQUAL(r, 0);
1237 
1238 		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1239 				       &bo_list);
1240 		CU_ASSERT_EQUAL(r, 0);
1241 
1242 		ptr = ib_result_cpu;
1243 		memset(ptr, 0, 16);
1244 		ptr[0]=PACKET3(PACKET3_NOP, 14);
1245 
1246 		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1247 		ib_info.ib_mc_address = ib_result_mc_address;
1248 		ib_info.size = 16;
1249 
1250 		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1251 		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
1252 		ibs_request.ring = instance;
1253 		ibs_request.number_of_ibs = 1;
1254 		ibs_request.ibs = &ib_info;
1255 		ibs_request.resources = bo_list;
1256 		ibs_request.fence_info.handle = NULL;
1257 
1258 		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1259 		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
1260 		CU_ASSERT_EQUAL(r, 0);
1261 
1262 		fence_status.context = context_handle;
1263 		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
1264 		fence_status.ip_instance = 0;
1265 		fence_status.ring = instance;
1266 		fence_status.fence = ibs_request.seq_no;
1267 
1268 		r = amdgpu_cs_query_fence_status(&fence_status,
1269 						 AMDGPU_TIMEOUT_INFINITE,
1270 						 0, &expired);
1271 		CU_ASSERT_EQUAL(r, 0);
1272 
1273 		r = amdgpu_bo_list_destroy(bo_list);
1274 		CU_ASSERT_EQUAL(r, 0);
1275 
1276 		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1277 					     ib_result_mc_address, 4096);
1278 		CU_ASSERT_EQUAL(r, 0);
1279 	}
1280 
1281 	r = amdgpu_cs_ctx_free(context_handle);
1282 	CU_ASSERT_EQUAL(r, 0);
1283 }
1284 
amdgpu_command_submission_compute_cp_write_data(void)1285 static void amdgpu_command_submission_compute_cp_write_data(void)
1286 {
1287 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
1288 }
1289 
amdgpu_command_submission_compute_cp_const_fill(void)1290 static void amdgpu_command_submission_compute_cp_const_fill(void)
1291 {
1292 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
1293 }
1294 
amdgpu_command_submission_compute_cp_copy_data(void)1295 static void amdgpu_command_submission_compute_cp_copy_data(void)
1296 {
1297 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
1298 }
1299 
amdgpu_command_submission_compute(void)1300 static void amdgpu_command_submission_compute(void)
1301 {
1302 	/* write data using the CP */
1303 	amdgpu_command_submission_compute_cp_write_data();
1304 	/* const fill using the CP */
1305 	amdgpu_command_submission_compute_cp_const_fill();
1306 	/* copy data using the CP */
1307 	amdgpu_command_submission_compute_cp_copy_data();
1308 	/* nop test */
1309 	amdgpu_command_submission_compute_nop();
1310 }
1311 
1312 /*
1313  * caller need create/release:
1314  * pm4_src, resources, ib_info, and ibs_request
1315  * submit command stream described in ibs_request and wait for this IB accomplished
1316  */
1317 void
amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request,bool secure)1318 amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,
1319 			       amdgpu_context_handle context_handle,
1320 			       unsigned ip_type, int instance, int pm4_dw,
1321 			       uint32_t *pm4_src, int res_cnt,
1322 			       amdgpu_bo_handle *resources,
1323 			       struct amdgpu_cs_ib_info *ib_info,
1324 			       struct amdgpu_cs_request *ibs_request,
1325 			       bool secure)
1326 {
1327 	int r;
1328 	uint32_t expired;
1329 	uint32_t *ring_ptr;
1330 	amdgpu_bo_handle ib_result_handle;
1331 	void *ib_result_cpu;
1332 	uint64_t ib_result_mc_address;
1333 	struct amdgpu_cs_fence fence_status = {0};
1334 	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
1335 	amdgpu_va_handle va_handle;
1336 
1337 	/* prepare CS */
1338 	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
1339 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1340 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1341 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1342 	CU_ASSERT_TRUE(pm4_dw <= 1024);
1343 
1344 	/* allocate IB */
1345 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1346 				    AMDGPU_GEM_DOMAIN_GTT, 0,
1347 				    &ib_result_handle, &ib_result_cpu,
1348 				    &ib_result_mc_address, &va_handle);
1349 	CU_ASSERT_EQUAL(r, 0);
1350 
1351 	/* copy PM4 packet to ring from caller */
1352 	ring_ptr = ib_result_cpu;
1353 	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
1354 
1355 	ib_info->ib_mc_address = ib_result_mc_address;
1356 	ib_info->size = pm4_dw;
1357 	if (secure)
1358 		ib_info->flags |= AMDGPU_IB_FLAGS_SECURE;
1359 
1360 	ibs_request->ip_type = ip_type;
1361 	ibs_request->ring = instance;
1362 	ibs_request->number_of_ibs = 1;
1363 	ibs_request->ibs = ib_info;
1364 	ibs_request->fence_info.handle = NULL;
1365 
1366 	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
1367 	all_res[res_cnt] = ib_result_handle;
1368 
1369 	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
1370 				  NULL, &ibs_request->resources);
1371 	CU_ASSERT_EQUAL(r, 0);
1372 
1373 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1374 
1375 	/* submit CS */
1376 	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
1377 	CU_ASSERT_EQUAL(r, 0);
1378 
1379 	r = amdgpu_bo_list_destroy(ibs_request->resources);
1380 	CU_ASSERT_EQUAL(r, 0);
1381 
1382 	fence_status.ip_type = ip_type;
1383 	fence_status.ip_instance = 0;
1384 	fence_status.ring = ibs_request->ring;
1385 	fence_status.context = context_handle;
1386 	fence_status.fence = ibs_request->seq_no;
1387 
1388 	/* wait for IB accomplished */
1389 	r = amdgpu_cs_query_fence_status(&fence_status,
1390 					 AMDGPU_TIMEOUT_INFINITE,
1391 					 0, &expired);
1392 	CU_ASSERT_EQUAL(r, 0);
1393 	CU_ASSERT_EQUAL(expired, true);
1394 
1395 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1396 				     ib_result_mc_address, 4096);
1397 	CU_ASSERT_EQUAL(r, 0);
1398 }
1399 
1400 static void
amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request)1401 amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
1402 			   unsigned ip_type, int instance, int pm4_dw,
1403 			   uint32_t *pm4_src, int res_cnt,
1404 			   amdgpu_bo_handle *resources,
1405 			   struct amdgpu_cs_ib_info *ib_info,
1406 			   struct amdgpu_cs_request *ibs_request)
1407 {
1408 	amdgpu_test_exec_cs_helper_raw(device_handle, context_handle,
1409 				       ip_type, instance, pm4_dw, pm4_src,
1410 				       res_cnt, resources, ib_info,
1411 				       ibs_request, false);
1412 }
1413 
1414 void
amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle device,unsigned ip_type,bool secure)1415 amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle
1416 							  device, unsigned
1417 							  ip_type, bool secure)
1418 {
1419 	const int sdma_write_length = 128;
1420 	const int pm4_dw = 256;
1421 	amdgpu_context_handle context_handle;
1422 	amdgpu_bo_handle bo;
1423 	amdgpu_bo_handle *resources;
1424 	uint32_t *pm4;
1425 	struct amdgpu_cs_ib_info *ib_info;
1426 	struct amdgpu_cs_request *ibs_request;
1427 	uint64_t bo_mc;
1428 	volatile uint32_t *bo_cpu;
1429 	uint32_t bo_cpu_origin;
1430 	int i, j, r, loop, ring_id;
1431 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1432 	amdgpu_va_handle va_handle;
1433 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1434 
1435 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1436 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1437 
1438 	ib_info = calloc(1, sizeof(*ib_info));
1439 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1440 
1441 	ibs_request = calloc(1, sizeof(*ibs_request));
1442 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1443 
1444 	r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info);
1445 	CU_ASSERT_EQUAL(r, 0);
1446 
1447 	for (i = 0; secure && (i < 2); i++)
1448 		gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED;
1449 
1450 	r = amdgpu_cs_ctx_create(device, &context_handle);
1451 
1452 	CU_ASSERT_EQUAL(r, 0);
1453 
1454 	/* prepare resource */
1455 	resources = calloc(1, sizeof(amdgpu_bo_handle));
1456 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1457 
1458 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1459 		loop = 0;
1460 		while(loop < 2) {
1461 			/* allocate UC bo for sDMA use */
1462 			r = amdgpu_bo_alloc_and_map(device,
1463 						    sdma_write_length * sizeof(uint32_t),
1464 						    4096, AMDGPU_GEM_DOMAIN_GTT,
1465 						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1466 						    &bo_mc, &va_handle);
1467 			CU_ASSERT_EQUAL(r, 0);
1468 
1469 			/* clear bo */
1470 			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1471 
1472 			resources[0] = bo;
1473 
1474 			/* fulfill PM4: test DMA write-linear */
1475 			i = j = 0;
1476 			if (ip_type == AMDGPU_HW_IP_DMA) {
1477 				if (family_id == AMDGPU_FAMILY_SI)
1478 					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1479 								  sdma_write_length);
1480 				else
1481 					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1482 							       SDMA_WRITE_SUB_OPCODE_LINEAR,
1483 							       secure ? SDMA_ATOMIC_TMZ(1) : 0);
1484 				pm4[i++] = 0xfffffffc & bo_mc;
1485 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1486 				if (family_id >= AMDGPU_FAMILY_AI)
1487 					pm4[i++] = sdma_write_length - 1;
1488 				else if (family_id != AMDGPU_FAMILY_SI)
1489 					pm4[i++] = sdma_write_length;
1490 				while(j++ < sdma_write_length)
1491 					pm4[i++] = 0xdeadbeaf;
1492 			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1493 				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1494 				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1495 				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1496 				pm4[i++] = 0xfffffffc & bo_mc;
1497 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1498 				while(j++ < sdma_write_length)
1499 					pm4[i++] = 0xdeadbeaf;
1500 			}
1501 
1502 			amdgpu_test_exec_cs_helper_raw(device, context_handle,
1503 						       ip_type, ring_id, i, pm4,
1504 						       1, resources, ib_info,
1505 						       ibs_request, secure);
1506 
1507 			/* verify if SDMA test result meets with expected */
1508 			i = 0;
1509 			if (!secure) {
1510 				while(i < sdma_write_length) {
1511 					CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1512 				}
1513 			} else if (ip_type == AMDGPU_HW_IP_GFX) {
1514 				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1515 				pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7);
1516 				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1517 				 * command, 1-loop_until_compare_satisfied.
1518 				 * single_pass_atomic, 0-lru
1519 				 * engine_sel, 0-micro_engine
1520 				 */
1521 				pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 |
1522 							ATOMIC_MEM_COMMAND(1) |
1523 							ATOMIC_MEM_CACHEPOLICAY(0) |
1524 							ATOMIC_MEM_ENGINESEL(0));
1525 				pm4[i++] = 0xfffffffc & bo_mc;
1526 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1527 				pm4[i++] = 0x12345678;
1528 				pm4[i++] = 0x0;
1529 				pm4[i++] = 0xdeadbeaf;
1530 				pm4[i++] = 0x0;
1531 				pm4[i++] = 0x100;
1532 				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1533 							ip_type, ring_id, i, pm4,
1534 							1, resources, ib_info,
1535 							ibs_request, true);
1536 			} else if (ip_type == AMDGPU_HW_IP_DMA) {
1537 				/* restore the bo_cpu to compare */
1538 				bo_cpu_origin = bo_cpu[0];
1539 				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1540 				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1541 				 * loop, 1-loop_until_compare_satisfied.
1542 				 * single_pass_atomic, 0-lru
1543 				 */
1544 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1545 							       0,
1546 							       SDMA_ATOMIC_LOOP(1) |
1547 							       SDMA_ATOMIC_TMZ(1) |
1548 							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1549 				pm4[i++] = 0xfffffffc & bo_mc;
1550 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1551 				pm4[i++] = 0x12345678;
1552 				pm4[i++] = 0x0;
1553 				pm4[i++] = 0xdeadbeaf;
1554 				pm4[i++] = 0x0;
1555 				pm4[i++] = 0x100;
1556 				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1557 							ip_type, ring_id, i, pm4,
1558 							1, resources, ib_info,
1559 							ibs_request, true);
1560 				/* DMA's atomic behavir is unlike GFX
1561 				 * If the comparing data is not equal to destination data,
1562 				 * For GFX, loop again till gfx timeout(system hang).
1563 				 * For DMA, loop again till timer expired and then send interrupt.
1564 				 * So testcase can't use interrupt mechanism.
1565 				 * We take another way to verify. When the comparing data is not
1566 				 * equal to destination data, overwrite the source data to the destination
1567 				 * buffer. Otherwise, original destination data unchanged.
1568 				 * So if the bo_cpu data is overwritten, the result is passed.
1569 				 */
1570 				CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin);
1571 
1572 				/* compare again for the case of dest_data != cmp_data */
1573 				i = 0;
1574 				/* restore again, here dest_data should be */
1575 				bo_cpu_origin = bo_cpu[0];
1576 				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1577 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1578 							       0,
1579 							       SDMA_ATOMIC_LOOP(1) |
1580 							       SDMA_ATOMIC_TMZ(1) |
1581 							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1582 				pm4[i++] = 0xfffffffc & bo_mc;
1583 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1584 				pm4[i++] = 0x87654321;
1585 				pm4[i++] = 0x0;
1586 				pm4[i++] = 0xdeadbeaf;
1587 				pm4[i++] = 0x0;
1588 				pm4[i++] = 0x100;
1589 				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1590 							ip_type, ring_id, i, pm4,
1591 							1, resources, ib_info,
1592 							ibs_request, true);
1593 				/* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/
1594 				CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin);
1595 			}
1596 
1597 			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1598 						     sdma_write_length * sizeof(uint32_t));
1599 			CU_ASSERT_EQUAL(r, 0);
1600 			loop++;
1601 		}
1602 	}
1603 	/* clean resources */
1604 	free(resources);
1605 	free(ibs_request);
1606 	free(ib_info);
1607 	free(pm4);
1608 
1609 	/* end of test */
1610 	r = amdgpu_cs_ctx_free(context_handle);
1611 	CU_ASSERT_EQUAL(r, 0);
1612 }
1613 
amdgpu_command_submission_write_linear_helper(unsigned ip_type)1614 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1615 {
1616 	amdgpu_command_submission_write_linear_helper_with_secure(device_handle,
1617 								  ip_type,
1618 								  false);
1619 }
1620 
amdgpu_command_submission_sdma_write_linear(void)1621 static void amdgpu_command_submission_sdma_write_linear(void)
1622 {
1623 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1624 }
1625 
amdgpu_command_submission_const_fill_helper(unsigned ip_type)1626 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1627 {
1628 	const int sdma_write_length = 1024 * 1024;
1629 	const int pm4_dw = 256;
1630 	amdgpu_context_handle context_handle;
1631 	amdgpu_bo_handle bo;
1632 	amdgpu_bo_handle *resources;
1633 	uint32_t *pm4;
1634 	struct amdgpu_cs_ib_info *ib_info;
1635 	struct amdgpu_cs_request *ibs_request;
1636 	uint64_t bo_mc;
1637 	volatile uint32_t *bo_cpu;
1638 	int i, j, r, loop, ring_id;
1639 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1640 	amdgpu_va_handle va_handle;
1641 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1642 
1643 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1644 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1645 
1646 	ib_info = calloc(1, sizeof(*ib_info));
1647 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1648 
1649 	ibs_request = calloc(1, sizeof(*ibs_request));
1650 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1651 
1652 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1653 	CU_ASSERT_EQUAL(r, 0);
1654 
1655 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1656 	CU_ASSERT_EQUAL(r, 0);
1657 
1658 	/* prepare resource */
1659 	resources = calloc(1, sizeof(amdgpu_bo_handle));
1660 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1661 
1662 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1663 		loop = 0;
1664 		while(loop < 2) {
1665 			/* allocate UC bo for sDMA use */
1666 			r = amdgpu_bo_alloc_and_map(device_handle,
1667 						    sdma_write_length, 4096,
1668 						    AMDGPU_GEM_DOMAIN_GTT,
1669 						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1670 						    &bo_mc, &va_handle);
1671 			CU_ASSERT_EQUAL(r, 0);
1672 
1673 			/* clear bo */
1674 			memset((void*)bo_cpu, 0, sdma_write_length);
1675 
1676 			resources[0] = bo;
1677 
1678 			/* fulfill PM4: test DMA const fill */
1679 			i = j = 0;
1680 			if (ip_type == AMDGPU_HW_IP_DMA) {
1681 				if (family_id == AMDGPU_FAMILY_SI) {
1682 					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1683 								  0, 0, 0,
1684 								  sdma_write_length / 4);
1685 					pm4[i++] = 0xfffffffc & bo_mc;
1686 					pm4[i++] = 0xdeadbeaf;
1687 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1688 				} else {
1689 					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1690 							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1691 					pm4[i++] = 0xffffffff & bo_mc;
1692 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1693 					pm4[i++] = 0xdeadbeaf;
1694 					if (family_id >= AMDGPU_FAMILY_AI)
1695 						pm4[i++] = sdma_write_length - 1;
1696 					else
1697 						pm4[i++] = sdma_write_length;
1698 				}
1699 			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1700 				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1701 				if (family_id == AMDGPU_FAMILY_SI) {
1702 					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1703 					pm4[i++] = 0xdeadbeaf;
1704 					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1705 						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1706 						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1707 						   PACKET3_DMA_DATA_SI_CP_SYNC;
1708 					pm4[i++] = 0xffffffff & bo_mc;
1709 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1710 					pm4[i++] = sdma_write_length;
1711 				} else {
1712 					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1713 					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1714 						   PACKET3_DMA_DATA_DST_SEL(0) |
1715 						   PACKET3_DMA_DATA_SRC_SEL(2) |
1716 						   PACKET3_DMA_DATA_CP_SYNC;
1717 					pm4[i++] = 0xdeadbeaf;
1718 					pm4[i++] = 0;
1719 					pm4[i++] = 0xfffffffc & bo_mc;
1720 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1721 					pm4[i++] = sdma_write_length;
1722 				}
1723 			}
1724 
1725 			amdgpu_test_exec_cs_helper(context_handle,
1726 						   ip_type, ring_id,
1727 						   i, pm4,
1728 						   1, resources,
1729 						   ib_info, ibs_request);
1730 
1731 			/* verify if SDMA test result meets with expected */
1732 			i = 0;
1733 			while(i < (sdma_write_length / 4)) {
1734 				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1735 			}
1736 
1737 			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1738 						     sdma_write_length);
1739 			CU_ASSERT_EQUAL(r, 0);
1740 			loop++;
1741 		}
1742 	}
1743 	/* clean resources */
1744 	free(resources);
1745 	free(ibs_request);
1746 	free(ib_info);
1747 	free(pm4);
1748 
1749 	/* end of test */
1750 	r = amdgpu_cs_ctx_free(context_handle);
1751 	CU_ASSERT_EQUAL(r, 0);
1752 }
1753 
amdgpu_command_submission_sdma_const_fill(void)1754 static void amdgpu_command_submission_sdma_const_fill(void)
1755 {
1756 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1757 }
1758 
amdgpu_command_submission_copy_linear_helper(unsigned ip_type)1759 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1760 {
1761 	const int sdma_write_length = 1024;
1762 	const int pm4_dw = 256;
1763 	amdgpu_context_handle context_handle;
1764 	amdgpu_bo_handle bo1, bo2;
1765 	amdgpu_bo_handle *resources;
1766 	uint32_t *pm4;
1767 	struct amdgpu_cs_ib_info *ib_info;
1768 	struct amdgpu_cs_request *ibs_request;
1769 	uint64_t bo1_mc, bo2_mc;
1770 	volatile unsigned char *bo1_cpu, *bo2_cpu;
1771 	int i, j, r, loop1, loop2, ring_id;
1772 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1773 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1774 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1775 
1776 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1777 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1778 
1779 	ib_info = calloc(1, sizeof(*ib_info));
1780 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1781 
1782 	ibs_request = calloc(1, sizeof(*ibs_request));
1783 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1784 
1785 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1786 	CU_ASSERT_EQUAL(r, 0);
1787 
1788 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1789 	CU_ASSERT_EQUAL(r, 0);
1790 
1791 	/* prepare resource */
1792 	resources = calloc(2, sizeof(amdgpu_bo_handle));
1793 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1794 
1795 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1796 		loop1 = loop2 = 0;
1797 		/* run 9 circle to test all mapping combination */
1798 		while(loop1 < 2) {
1799 			while(loop2 < 2) {
1800 				/* allocate UC bo1for sDMA use */
1801 				r = amdgpu_bo_alloc_and_map(device_handle,
1802 							    sdma_write_length, 4096,
1803 							    AMDGPU_GEM_DOMAIN_GTT,
1804 							    gtt_flags[loop1], &bo1,
1805 							    (void**)&bo1_cpu, &bo1_mc,
1806 							    &bo1_va_handle);
1807 				CU_ASSERT_EQUAL(r, 0);
1808 
1809 				/* set bo1 */
1810 				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1811 
1812 				/* allocate UC bo2 for sDMA use */
1813 				r = amdgpu_bo_alloc_and_map(device_handle,
1814 							    sdma_write_length, 4096,
1815 							    AMDGPU_GEM_DOMAIN_GTT,
1816 							    gtt_flags[loop2], &bo2,
1817 							    (void**)&bo2_cpu, &bo2_mc,
1818 							    &bo2_va_handle);
1819 				CU_ASSERT_EQUAL(r, 0);
1820 
1821 				/* clear bo2 */
1822 				memset((void*)bo2_cpu, 0, sdma_write_length);
1823 
1824 				resources[0] = bo1;
1825 				resources[1] = bo2;
1826 
1827 				/* fulfill PM4: test DMA copy linear */
1828 				i = j = 0;
1829 				if (ip_type == AMDGPU_HW_IP_DMA) {
1830 					if (family_id == AMDGPU_FAMILY_SI) {
1831 						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1832 									  0, 0, 0,
1833 									  sdma_write_length);
1834 						pm4[i++] = 0xffffffff & bo2_mc;
1835 						pm4[i++] = 0xffffffff & bo1_mc;
1836 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1837 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1838 					} else {
1839 						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1840 								       SDMA_COPY_SUB_OPCODE_LINEAR,
1841 								       0);
1842 						if (family_id >= AMDGPU_FAMILY_AI)
1843 							pm4[i++] = sdma_write_length - 1;
1844 						else
1845 							pm4[i++] = sdma_write_length;
1846 						pm4[i++] = 0;
1847 						pm4[i++] = 0xffffffff & bo1_mc;
1848 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1849 						pm4[i++] = 0xffffffff & bo2_mc;
1850 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1851 					}
1852 				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1853 					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1854 					if (family_id == AMDGPU_FAMILY_SI) {
1855 						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1856 						pm4[i++] = 0xfffffffc & bo1_mc;
1857 						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1858 							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1859 							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1860 							   PACKET3_DMA_DATA_SI_CP_SYNC |
1861 							   (0xffff00000000 & bo1_mc) >> 32;
1862 						pm4[i++] = 0xfffffffc & bo2_mc;
1863 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1864 						pm4[i++] = sdma_write_length;
1865 					} else {
1866 						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1867 						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1868 							   PACKET3_DMA_DATA_DST_SEL(0) |
1869 							   PACKET3_DMA_DATA_SRC_SEL(0) |
1870 							   PACKET3_DMA_DATA_CP_SYNC;
1871 						pm4[i++] = 0xfffffffc & bo1_mc;
1872 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1873 						pm4[i++] = 0xfffffffc & bo2_mc;
1874 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1875 						pm4[i++] = sdma_write_length;
1876 					}
1877 				}
1878 
1879 				amdgpu_test_exec_cs_helper(context_handle,
1880 							   ip_type, ring_id,
1881 							   i, pm4,
1882 							   2, resources,
1883 							   ib_info, ibs_request);
1884 
1885 				/* verify if SDMA test result meets with expected */
1886 				i = 0;
1887 				while(i < sdma_write_length) {
1888 					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1889 				}
1890 				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1891 							     sdma_write_length);
1892 				CU_ASSERT_EQUAL(r, 0);
1893 				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1894 							     sdma_write_length);
1895 				CU_ASSERT_EQUAL(r, 0);
1896 				loop2++;
1897 			}
1898 			loop1++;
1899 		}
1900 	}
1901 	/* clean resources */
1902 	free(resources);
1903 	free(ibs_request);
1904 	free(ib_info);
1905 	free(pm4);
1906 
1907 	/* end of test */
1908 	r = amdgpu_cs_ctx_free(context_handle);
1909 	CU_ASSERT_EQUAL(r, 0);
1910 }
1911 
amdgpu_command_submission_sdma_copy_linear(void)1912 static void amdgpu_command_submission_sdma_copy_linear(void)
1913 {
1914 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
1915 }
1916 
amdgpu_command_submission_sdma(void)1917 static void amdgpu_command_submission_sdma(void)
1918 {
1919 	amdgpu_command_submission_sdma_write_linear();
1920 	amdgpu_command_submission_sdma_const_fill();
1921 	amdgpu_command_submission_sdma_copy_linear();
1922 }
1923 
amdgpu_command_submission_multi_fence_wait_all(bool wait_all)1924 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1925 {
1926 	amdgpu_context_handle context_handle;
1927 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1928 	void *ib_result_cpu, *ib_result_ce_cpu;
1929 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1930 	struct amdgpu_cs_request ibs_request[2] = {0};
1931 	struct amdgpu_cs_ib_info ib_info[2];
1932 	struct amdgpu_cs_fence fence_status[2] = {0};
1933 	uint32_t *ptr;
1934 	uint32_t expired;
1935 	amdgpu_bo_list_handle bo_list;
1936 	amdgpu_va_handle va_handle, va_handle_ce;
1937 	int r;
1938 	int i = 0, ib_cs_num = 2;
1939 
1940 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1941 	CU_ASSERT_EQUAL(r, 0);
1942 
1943 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1944 				    AMDGPU_GEM_DOMAIN_GTT, 0,
1945 				    &ib_result_handle, &ib_result_cpu,
1946 				    &ib_result_mc_address, &va_handle);
1947 	CU_ASSERT_EQUAL(r, 0);
1948 
1949 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1950 				    AMDGPU_GEM_DOMAIN_GTT, 0,
1951 				    &ib_result_ce_handle, &ib_result_ce_cpu,
1952 				    &ib_result_ce_mc_address, &va_handle_ce);
1953 	CU_ASSERT_EQUAL(r, 0);
1954 
1955 	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1956 			       ib_result_ce_handle, &bo_list);
1957 	CU_ASSERT_EQUAL(r, 0);
1958 
1959 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1960 
1961 	/* IT_SET_CE_DE_COUNTERS */
1962 	ptr = ib_result_ce_cpu;
1963 	if (family_id != AMDGPU_FAMILY_SI) {
1964 		ptr[i++] = 0xc0008900;
1965 		ptr[i++] = 0;
1966 	}
1967 	ptr[i++] = 0xc0008400;
1968 	ptr[i++] = 1;
1969 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1970 	ib_info[0].size = i;
1971 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1972 
1973 	/* IT_WAIT_ON_CE_COUNTER */
1974 	ptr = ib_result_cpu;
1975 	ptr[0] = 0xc0008600;
1976 	ptr[1] = 0x00000001;
1977 	ib_info[1].ib_mc_address = ib_result_mc_address;
1978 	ib_info[1].size = 2;
1979 
1980 	for (i = 0; i < ib_cs_num; i++) {
1981 		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1982 		ibs_request[i].number_of_ibs = 2;
1983 		ibs_request[i].ibs = ib_info;
1984 		ibs_request[i].resources = bo_list;
1985 		ibs_request[i].fence_info.handle = NULL;
1986 	}
1987 
1988 	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1989 
1990 	CU_ASSERT_EQUAL(r, 0);
1991 
1992 	for (i = 0; i < ib_cs_num; i++) {
1993 		fence_status[i].context = context_handle;
1994 		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1995 		fence_status[i].fence = ibs_request[i].seq_no;
1996 	}
1997 
1998 	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1999 				AMDGPU_TIMEOUT_INFINITE,
2000 				&expired, NULL);
2001 	CU_ASSERT_EQUAL(r, 0);
2002 
2003 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2004 				     ib_result_mc_address, 4096);
2005 	CU_ASSERT_EQUAL(r, 0);
2006 
2007 	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
2008 				     ib_result_ce_mc_address, 4096);
2009 	CU_ASSERT_EQUAL(r, 0);
2010 
2011 	r = amdgpu_bo_list_destroy(bo_list);
2012 	CU_ASSERT_EQUAL(r, 0);
2013 
2014 	r = amdgpu_cs_ctx_free(context_handle);
2015 	CU_ASSERT_EQUAL(r, 0);
2016 }
2017 
amdgpu_command_submission_multi_fence(void)2018 static void amdgpu_command_submission_multi_fence(void)
2019 {
2020 	amdgpu_command_submission_multi_fence_wait_all(true);
2021 	amdgpu_command_submission_multi_fence_wait_all(false);
2022 }
2023 
amdgpu_userptr_test(void)2024 static void amdgpu_userptr_test(void)
2025 {
2026 	int i, r, j;
2027 	uint32_t *pm4 = NULL;
2028 	uint64_t bo_mc;
2029 	void *ptr = NULL;
2030 	int pm4_dw = 256;
2031 	int sdma_write_length = 4;
2032 	amdgpu_bo_handle handle;
2033 	amdgpu_context_handle context_handle;
2034 	struct amdgpu_cs_ib_info *ib_info;
2035 	struct amdgpu_cs_request *ibs_request;
2036 	amdgpu_bo_handle buf_handle;
2037 	amdgpu_va_handle va_handle;
2038 
2039 	pm4 = calloc(pm4_dw, sizeof(*pm4));
2040 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
2041 
2042 	ib_info = calloc(1, sizeof(*ib_info));
2043 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
2044 
2045 	ibs_request = calloc(1, sizeof(*ibs_request));
2046 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
2047 
2048 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2049 	CU_ASSERT_EQUAL(r, 0);
2050 
2051 	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
2052 	CU_ASSERT_NOT_EQUAL(ptr, NULL);
2053 	memset(ptr, 0, BUFFER_SIZE);
2054 
2055 	r = amdgpu_create_bo_from_user_mem(device_handle,
2056 					   ptr, BUFFER_SIZE, &buf_handle);
2057 	CU_ASSERT_EQUAL(r, 0);
2058 
2059 	r = amdgpu_va_range_alloc(device_handle,
2060 				  amdgpu_gpu_va_range_general,
2061 				  BUFFER_SIZE, 1, 0, &bo_mc,
2062 				  &va_handle, 0);
2063 	CU_ASSERT_EQUAL(r, 0);
2064 
2065 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
2066 	CU_ASSERT_EQUAL(r, 0);
2067 
2068 	handle = buf_handle;
2069 
2070 	j = i = 0;
2071 
2072 	if (family_id == AMDGPU_FAMILY_SI)
2073 		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
2074 				sdma_write_length);
2075 	else
2076 		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
2077 				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2078 	pm4[i++] = 0xffffffff & bo_mc;
2079 	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
2080 	if (family_id >= AMDGPU_FAMILY_AI)
2081 		pm4[i++] = sdma_write_length - 1;
2082 	else if (family_id != AMDGPU_FAMILY_SI)
2083 		pm4[i++] = sdma_write_length;
2084 
2085 	while (j++ < sdma_write_length)
2086 		pm4[i++] = 0xdeadbeaf;
2087 
2088 	if (!fork()) {
2089 		pm4[0] = 0x0;
2090 		exit(0);
2091 	}
2092 
2093 	amdgpu_test_exec_cs_helper(context_handle,
2094 				   AMDGPU_HW_IP_DMA, 0,
2095 				   i, pm4,
2096 				   1, &handle,
2097 				   ib_info, ibs_request);
2098 	i = 0;
2099 	while (i < sdma_write_length) {
2100 		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
2101 	}
2102 	free(ibs_request);
2103 	free(ib_info);
2104 	free(pm4);
2105 
2106 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
2107 	CU_ASSERT_EQUAL(r, 0);
2108 	r = amdgpu_va_range_free(va_handle);
2109 	CU_ASSERT_EQUAL(r, 0);
2110 	r = amdgpu_bo_free(buf_handle);
2111 	CU_ASSERT_EQUAL(r, 0);
2112 	free(ptr);
2113 
2114 	r = amdgpu_cs_ctx_free(context_handle);
2115 	CU_ASSERT_EQUAL(r, 0);
2116 
2117 	wait(NULL);
2118 }
2119 
amdgpu_sync_dependency_test(void)2120 static void amdgpu_sync_dependency_test(void)
2121 {
2122 	amdgpu_context_handle context_handle[2];
2123 	amdgpu_bo_handle ib_result_handle;
2124 	void *ib_result_cpu;
2125 	uint64_t ib_result_mc_address;
2126 	struct amdgpu_cs_request ibs_request;
2127 	struct amdgpu_cs_ib_info ib_info;
2128 	struct amdgpu_cs_fence fence_status;
2129 	uint32_t expired;
2130 	int i, j, r;
2131 	amdgpu_bo_list_handle bo_list;
2132 	amdgpu_va_handle va_handle;
2133 	static uint32_t *ptr;
2134 	uint64_t seq_no;
2135 
2136 	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
2137 	CU_ASSERT_EQUAL(r, 0);
2138 	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
2139 	CU_ASSERT_EQUAL(r, 0);
2140 
2141 	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
2142 			AMDGPU_GEM_DOMAIN_GTT, 0,
2143 						    &ib_result_handle, &ib_result_cpu,
2144 						    &ib_result_mc_address, &va_handle);
2145 	CU_ASSERT_EQUAL(r, 0);
2146 
2147 	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
2148 			       &bo_list);
2149 	CU_ASSERT_EQUAL(r, 0);
2150 
2151 	ptr = ib_result_cpu;
2152 	i = 0;
2153 
2154 	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
2155 
2156 	/* Dispatch minimal init config and verify it's executed */
2157 	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2158 	ptr[i++] = 0x80000000;
2159 	ptr[i++] = 0x80000000;
2160 
2161 	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
2162 	ptr[i++] = 0x80000000;
2163 
2164 
2165 	/* Program compute regs */
2166 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2167 	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
2168 	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
2169 	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
2170 
2171 
2172 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2173 	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
2174 	/*
2175 	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
2176 	                                      SGPRS = 1
2177 	                                      PRIORITY = 0
2178 	                                      FLOAT_MODE = 192 (0xc0)
2179 	                                      PRIV = 0
2180 	                                      DX10_CLAMP = 1
2181 	                                      DEBUG_MODE = 0
2182 	                                      IEEE_MODE = 0
2183 	                                      BULKY = 0
2184 	                                      CDBG_USER = 0
2185 	 *
2186 	 */
2187 	ptr[i++] = 0x002c0040;
2188 
2189 
2190 	/*
2191 	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
2192 	                                      USER_SGPR = 8
2193 	                                      TRAP_PRESENT = 0
2194 	                                      TGID_X_EN = 0
2195 	                                      TGID_Y_EN = 0
2196 	                                      TGID_Z_EN = 0
2197 	                                      TG_SIZE_EN = 0
2198 	                                      TIDIG_COMP_CNT = 0
2199 	                                      EXCP_EN_MSB = 0
2200 	                                      LDS_SIZE = 0
2201 	                                      EXCP_EN = 0
2202 	 *
2203 	 */
2204 	ptr[i++] = 0x00000010;
2205 
2206 
2207 /*
2208  * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
2209                                          WAVESIZE = 0
2210  *
2211  */
2212 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2213 	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
2214 	ptr[i++] = 0x00000100;
2215 
2216 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2217 	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
2218 	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
2219 	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2220 
2221 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2222 	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
2223 	ptr[i++] = 0;
2224 
2225 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
2226 	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
2227 	ptr[i++] = 1;
2228 	ptr[i++] = 1;
2229 	ptr[i++] = 1;
2230 
2231 
2232 	/* Dispatch */
2233 	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
2234 	ptr[i++] = 1;
2235 	ptr[i++] = 1;
2236 	ptr[i++] = 1;
2237 	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
2238 
2239 
2240 	while (i & 7)
2241 		ptr[i++] =  0xffff1000; /* type3 nop packet */
2242 
2243 	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2244 	ib_info.ib_mc_address = ib_result_mc_address;
2245 	ib_info.size = i;
2246 
2247 	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2248 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2249 	ibs_request.ring = 0;
2250 	ibs_request.number_of_ibs = 1;
2251 	ibs_request.ibs = &ib_info;
2252 	ibs_request.resources = bo_list;
2253 	ibs_request.fence_info.handle = NULL;
2254 
2255 	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
2256 	CU_ASSERT_EQUAL(r, 0);
2257 	seq_no = ibs_request.seq_no;
2258 
2259 
2260 
2261 	/* Prepare second command with dependency on the first */
2262 	j = i;
2263 	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
2264 	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
2265 	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
2266 	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2267 	ptr[i++] = 99;
2268 
2269 	while (i & 7)
2270 		ptr[i++] =  0xffff1000; /* type3 nop packet */
2271 
2272 	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2273 	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
2274 	ib_info.size = i - j;
2275 
2276 	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2277 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2278 	ibs_request.ring = 0;
2279 	ibs_request.number_of_ibs = 1;
2280 	ibs_request.ibs = &ib_info;
2281 	ibs_request.resources = bo_list;
2282 	ibs_request.fence_info.handle = NULL;
2283 
2284 	ibs_request.number_of_dependencies = 1;
2285 
2286 	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
2287 	ibs_request.dependencies[0].context = context_handle[1];
2288 	ibs_request.dependencies[0].ip_instance = 0;
2289 	ibs_request.dependencies[0].ring = 0;
2290 	ibs_request.dependencies[0].fence = seq_no;
2291 
2292 
2293 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
2294 	CU_ASSERT_EQUAL(r, 0);
2295 
2296 
2297 	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
2298 	fence_status.context = context_handle[0];
2299 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
2300 	fence_status.ip_instance = 0;
2301 	fence_status.ring = 0;
2302 	fence_status.fence = ibs_request.seq_no;
2303 
2304 	r = amdgpu_cs_query_fence_status(&fence_status,
2305 		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
2306 	CU_ASSERT_EQUAL(r, 0);
2307 
2308 	/* Expect the second command to wait for shader to complete */
2309 	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
2310 
2311 	r = amdgpu_bo_list_destroy(bo_list);
2312 	CU_ASSERT_EQUAL(r, 0);
2313 
2314 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2315 				     ib_result_mc_address, 4096);
2316 	CU_ASSERT_EQUAL(r, 0);
2317 
2318 	r = amdgpu_cs_ctx_free(context_handle[0]);
2319 	CU_ASSERT_EQUAL(r, 0);
2320 	r = amdgpu_cs_ctx_free(context_handle[1]);
2321 	CU_ASSERT_EQUAL(r, 0);
2322 
2323 	free(ibs_request.dependencies);
2324 }
2325 
amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t * ptr,int family)2326 static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family)
2327 {
2328 	struct amdgpu_test_shader *shader;
2329 	int i, loop = 0x10000;
2330 
2331 	switch (family) {
2332 		case AMDGPU_FAMILY_AI:
2333 			shader = &memcpy_cs_hang_slow_ai;
2334 			break;
2335 		case AMDGPU_FAMILY_RV:
2336 			shader = &memcpy_cs_hang_slow_rv;
2337 			break;
2338 		default:
2339 			return -1;
2340 			break;
2341 	}
2342 
2343 	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2344 
2345 	for (i = 0; i < loop; i++)
2346 		memcpy(ptr + shader->header_length + shader->body_length * i,
2347 			shader->shader + shader->header_length,
2348 			shader->body_length * sizeof(uint32_t));
2349 
2350 	memcpy(ptr + shader->header_length + shader->body_length * loop,
2351 		shader->shader + shader->header_length + shader->body_length,
2352 		shader->foot_length * sizeof(uint32_t));
2353 
2354 	return 0;
2355 }
2356 
amdgpu_dispatch_load_cs_shader(uint8_t * ptr,int cs_type)2357 static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
2358 					   int cs_type)
2359 {
2360 	uint32_t shader_size;
2361 	const uint32_t *shader;
2362 
2363 	switch (cs_type) {
2364 		case CS_BUFFERCLEAR:
2365 			shader = bufferclear_cs_shader_gfx9;
2366 			shader_size = sizeof(bufferclear_cs_shader_gfx9);
2367 			break;
2368 		case CS_BUFFERCOPY:
2369 			shader = buffercopy_cs_shader_gfx9;
2370 			shader_size = sizeof(buffercopy_cs_shader_gfx9);
2371 			break;
2372 		case CS_HANG:
2373 			shader = memcpy_ps_hang;
2374 			shader_size = sizeof(memcpy_ps_hang);
2375 			break;
2376 		default:
2377 			return -1;
2378 			break;
2379 	}
2380 
2381 	memcpy(ptr, shader, shader_size);
2382 	return 0;
2383 }
2384 
amdgpu_dispatch_init(uint32_t * ptr,uint32_t ip_type)2385 static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type)
2386 {
2387 	int i = 0;
2388 
2389 	/* Write context control and load shadowing register if necessary */
2390 	if (ip_type == AMDGPU_HW_IP_GFX) {
2391 		ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2392 		ptr[i++] = 0x80000000;
2393 		ptr[i++] = 0x80000000;
2394 	}
2395 
2396 	/* Issue commands to set default compute state. */
2397 	/* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
2398 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
2399 	ptr[i++] = 0x204;
2400 	i += 3;
2401 
2402 	/* clear mmCOMPUTE_TMPRING_SIZE */
2403 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2404 	ptr[i++] = 0x218;
2405 	ptr[i++] = 0;
2406 
2407 	return i;
2408 }
2409 
amdgpu_dispatch_write_cumask(uint32_t * ptr)2410 static int amdgpu_dispatch_write_cumask(uint32_t *ptr)
2411 {
2412 	int i = 0;
2413 
2414 	/*  Issue commands to set cu mask used in current dispatch */
2415 	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
2416 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2417 	ptr[i++] = 0x216;
2418 	ptr[i++] = 0xffffffff;
2419 	ptr[i++] = 0xffffffff;
2420 	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
2421 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2422 	ptr[i++] = 0x219;
2423 	ptr[i++] = 0xffffffff;
2424 	ptr[i++] = 0xffffffff;
2425 
2426 	return i;
2427 }
2428 
amdgpu_dispatch_write2hw(uint32_t * ptr,uint64_t shader_addr)2429 static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr)
2430 {
2431 	int i, j;
2432 
2433 	i = 0;
2434 
2435 	/* Writes shader state to HW */
2436 	/* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
2437 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2438 	ptr[i++] = 0x20c;
2439 	ptr[i++] = (shader_addr >> 8);
2440 	ptr[i++] = (shader_addr >> 40);
2441 	/* write sh regs*/
2442 	for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
2443 		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2444 		/* - Gfx9ShRegBase */
2445 		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
2446 		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
2447 	}
2448 
2449 	return i;
2450 }
2451 
amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring)2452 static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
2453 					 uint32_t ip_type,
2454 					 uint32_t ring)
2455 {
2456 	amdgpu_context_handle context_handle;
2457 	amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
2458 	volatile unsigned char *ptr_dst;
2459 	void *ptr_shader;
2460 	uint32_t *ptr_cmd;
2461 	uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
2462 	amdgpu_va_handle va_dst, va_shader, va_cmd;
2463 	int i, r;
2464 	int bo_dst_size = 16384;
2465 	int bo_shader_size = 4096;
2466 	int bo_cmd_size = 4096;
2467 	struct amdgpu_cs_request ibs_request = {0};
2468 	struct amdgpu_cs_ib_info ib_info= {0};
2469 	amdgpu_bo_list_handle bo_list;
2470 	struct amdgpu_cs_fence fence_status = {0};
2471 	uint32_t expired;
2472 
2473 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2474 	CU_ASSERT_EQUAL(r, 0);
2475 
2476 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2477 					AMDGPU_GEM_DOMAIN_GTT, 0,
2478 					&bo_cmd, (void **)&ptr_cmd,
2479 					&mc_address_cmd, &va_cmd);
2480 	CU_ASSERT_EQUAL(r, 0);
2481 	memset(ptr_cmd, 0, bo_cmd_size);
2482 
2483 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2484 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2485 					&bo_shader, &ptr_shader,
2486 					&mc_address_shader, &va_shader);
2487 	CU_ASSERT_EQUAL(r, 0);
2488 	memset(ptr_shader, 0, bo_shader_size);
2489 
2490 	r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR);
2491 	CU_ASSERT_EQUAL(r, 0);
2492 
2493 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2494 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2495 					&bo_dst, (void **)&ptr_dst,
2496 					&mc_address_dst, &va_dst);
2497 	CU_ASSERT_EQUAL(r, 0);
2498 
2499 	i = 0;
2500 	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2501 
2502 	/*  Issue commands to set cu mask used in current dispatch */
2503 	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2504 
2505 	/* Writes shader state to HW */
2506 	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2507 
2508 	/* Write constant data */
2509 	/* Writes the UAV constant data to the SGPRs. */
2510 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2511 	ptr_cmd[i++] = 0x240;
2512 	ptr_cmd[i++] = mc_address_dst;
2513 	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2514 	ptr_cmd[i++] = 0x400;
2515 	ptr_cmd[i++] = 0x74fac;
2516 
2517 	/* Sets a range of pixel shader constants */
2518 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2519 	ptr_cmd[i++] = 0x244;
2520 	ptr_cmd[i++] = 0x22222222;
2521 	ptr_cmd[i++] = 0x22222222;
2522 	ptr_cmd[i++] = 0x22222222;
2523 	ptr_cmd[i++] = 0x22222222;
2524 
2525 	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2526 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2527 	ptr_cmd[i++] = 0x215;
2528 	ptr_cmd[i++] = 0;
2529 
2530 	/* dispatch direct command */
2531 	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2532 	ptr_cmd[i++] = 0x10;
2533 	ptr_cmd[i++] = 1;
2534 	ptr_cmd[i++] = 1;
2535 	ptr_cmd[i++] = 1;
2536 
2537 	while (i & 7)
2538 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2539 
2540 	resources[0] = bo_dst;
2541 	resources[1] = bo_shader;
2542 	resources[2] = bo_cmd;
2543 	r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
2544 	CU_ASSERT_EQUAL(r, 0);
2545 
2546 	ib_info.ib_mc_address = mc_address_cmd;
2547 	ib_info.size = i;
2548 	ibs_request.ip_type = ip_type;
2549 	ibs_request.ring = ring;
2550 	ibs_request.resources = bo_list;
2551 	ibs_request.number_of_ibs = 1;
2552 	ibs_request.ibs = &ib_info;
2553 	ibs_request.fence_info.handle = NULL;
2554 
2555 	/* submit CS */
2556 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2557 	CU_ASSERT_EQUAL(r, 0);
2558 
2559 	r = amdgpu_bo_list_destroy(bo_list);
2560 	CU_ASSERT_EQUAL(r, 0);
2561 
2562 	fence_status.ip_type = ip_type;
2563 	fence_status.ip_instance = 0;
2564 	fence_status.ring = ring;
2565 	fence_status.context = context_handle;
2566 	fence_status.fence = ibs_request.seq_no;
2567 
2568 	/* wait for IB accomplished */
2569 	r = amdgpu_cs_query_fence_status(&fence_status,
2570 					 AMDGPU_TIMEOUT_INFINITE,
2571 					 0, &expired);
2572 	CU_ASSERT_EQUAL(r, 0);
2573 	CU_ASSERT_EQUAL(expired, true);
2574 
2575 	/* verify if memset test result meets with expected */
2576 	i = 0;
2577 	while(i < bo_dst_size) {
2578 		CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
2579 	}
2580 
2581 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2582 	CU_ASSERT_EQUAL(r, 0);
2583 
2584 	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2585 	CU_ASSERT_EQUAL(r, 0);
2586 
2587 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2588 	CU_ASSERT_EQUAL(r, 0);
2589 
2590 	r = amdgpu_cs_ctx_free(context_handle);
2591 	CU_ASSERT_EQUAL(r, 0);
2592 }
2593 
amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring,int hang)2594 static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
2595 					uint32_t ip_type,
2596 					uint32_t ring,
2597 					int hang)
2598 {
2599 	amdgpu_context_handle context_handle;
2600 	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2601 	volatile unsigned char *ptr_dst;
2602 	void *ptr_shader;
2603 	unsigned char *ptr_src;
2604 	uint32_t *ptr_cmd;
2605 	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2606 	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2607 	int i, r;
2608 	int bo_dst_size = 16384;
2609 	int bo_shader_size = 4096;
2610 	int bo_cmd_size = 4096;
2611 	struct amdgpu_cs_request ibs_request = {0};
2612 	struct amdgpu_cs_ib_info ib_info= {0};
2613 	uint32_t expired, hang_state, hangs;
2614 	enum cs_type cs_type;
2615 	amdgpu_bo_list_handle bo_list;
2616 	struct amdgpu_cs_fence fence_status = {0};
2617 
2618 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2619 	CU_ASSERT_EQUAL(r, 0);
2620 
2621 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2622 				    AMDGPU_GEM_DOMAIN_GTT, 0,
2623 				    &bo_cmd, (void **)&ptr_cmd,
2624 				    &mc_address_cmd, &va_cmd);
2625 	CU_ASSERT_EQUAL(r, 0);
2626 	memset(ptr_cmd, 0, bo_cmd_size);
2627 
2628 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2629 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2630 					&bo_shader, &ptr_shader,
2631 					&mc_address_shader, &va_shader);
2632 	CU_ASSERT_EQUAL(r, 0);
2633 	memset(ptr_shader, 0, bo_shader_size);
2634 
2635 	cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
2636 	r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type);
2637 	CU_ASSERT_EQUAL(r, 0);
2638 
2639 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2640 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2641 					&bo_src, (void **)&ptr_src,
2642 					&mc_address_src, &va_src);
2643 	CU_ASSERT_EQUAL(r, 0);
2644 
2645 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2646 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2647 					&bo_dst, (void **)&ptr_dst,
2648 					&mc_address_dst, &va_dst);
2649 	CU_ASSERT_EQUAL(r, 0);
2650 
2651 	memset(ptr_src, 0x55, bo_dst_size);
2652 
2653 	i = 0;
2654 	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2655 
2656 	/*  Issue commands to set cu mask used in current dispatch */
2657 	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2658 
2659 	/* Writes shader state to HW */
2660 	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2661 
2662 	/* Write constant data */
2663 	/* Writes the texture resource constants data to the SGPRs */
2664 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2665 	ptr_cmd[i++] = 0x240;
2666 	ptr_cmd[i++] = mc_address_src;
2667 	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2668 	ptr_cmd[i++] = 0x400;
2669 	ptr_cmd[i++] = 0x74fac;
2670 
2671 	/* Writes the UAV constant data to the SGPRs. */
2672 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2673 	ptr_cmd[i++] = 0x244;
2674 	ptr_cmd[i++] = mc_address_dst;
2675 	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2676 	ptr_cmd[i++] = 0x400;
2677 	ptr_cmd[i++] = 0x74fac;
2678 
2679 	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2680 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2681 	ptr_cmd[i++] = 0x215;
2682 	ptr_cmd[i++] = 0;
2683 
2684 	/* dispatch direct command */
2685 	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2686 	ptr_cmd[i++] = 0x10;
2687 	ptr_cmd[i++] = 1;
2688 	ptr_cmd[i++] = 1;
2689 	ptr_cmd[i++] = 1;
2690 
2691 	while (i & 7)
2692 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2693 
2694 	resources[0] = bo_shader;
2695 	resources[1] = bo_src;
2696 	resources[2] = bo_dst;
2697 	resources[3] = bo_cmd;
2698 	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2699 	CU_ASSERT_EQUAL(r, 0);
2700 
2701 	ib_info.ib_mc_address = mc_address_cmd;
2702 	ib_info.size = i;
2703 	ibs_request.ip_type = ip_type;
2704 	ibs_request.ring = ring;
2705 	ibs_request.resources = bo_list;
2706 	ibs_request.number_of_ibs = 1;
2707 	ibs_request.ibs = &ib_info;
2708 	ibs_request.fence_info.handle = NULL;
2709 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2710 	CU_ASSERT_EQUAL(r, 0);
2711 
2712 	fence_status.ip_type = ip_type;
2713 	fence_status.ip_instance = 0;
2714 	fence_status.ring = ring;
2715 	fence_status.context = context_handle;
2716 	fence_status.fence = ibs_request.seq_no;
2717 
2718 	/* wait for IB accomplished */
2719 	r = amdgpu_cs_query_fence_status(&fence_status,
2720 					 AMDGPU_TIMEOUT_INFINITE,
2721 					 0, &expired);
2722 
2723 	if (!hang) {
2724 		CU_ASSERT_EQUAL(r, 0);
2725 		CU_ASSERT_EQUAL(expired, true);
2726 
2727 		/* verify if memcpy test result meets with expected */
2728 		i = 0;
2729 		while(i < bo_dst_size) {
2730 			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
2731 			i++;
2732 		}
2733 	} else {
2734 		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2735 		CU_ASSERT_EQUAL(r, 0);
2736 		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2737 	}
2738 
2739 	r = amdgpu_bo_list_destroy(bo_list);
2740 	CU_ASSERT_EQUAL(r, 0);
2741 
2742 	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2743 	CU_ASSERT_EQUAL(r, 0);
2744 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2745 	CU_ASSERT_EQUAL(r, 0);
2746 
2747 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2748 	CU_ASSERT_EQUAL(r, 0);
2749 
2750 	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2751 	CU_ASSERT_EQUAL(r, 0);
2752 
2753 	r = amdgpu_cs_ctx_free(context_handle);
2754 	CU_ASSERT_EQUAL(r, 0);
2755 }
2756 
amdgpu_compute_dispatch_test(void)2757 static void amdgpu_compute_dispatch_test(void)
2758 {
2759 	int r;
2760 	struct drm_amdgpu_info_hw_ip info;
2761 	uint32_t ring_id;
2762 
2763 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
2764 	CU_ASSERT_EQUAL(r, 0);
2765 	if (!info.available_rings)
2766 		printf("SKIP ... as there's no compute ring\n");
2767 
2768 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2769 		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
2770 		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0);
2771 	}
2772 }
2773 
amdgpu_gfx_dispatch_test(void)2774 static void amdgpu_gfx_dispatch_test(void)
2775 {
2776 	int r;
2777 	struct drm_amdgpu_info_hw_ip info;
2778 	uint32_t ring_id;
2779 
2780 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2781 	CU_ASSERT_EQUAL(r, 0);
2782 	if (!info.available_rings)
2783 		printf("SKIP ... as there's no graphics ring\n");
2784 
2785 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2786 		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
2787 		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0);
2788 	}
2789 }
2790 
amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle,uint32_t ip_type)2791 void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2792 {
2793 	int r;
2794 	struct drm_amdgpu_info_hw_ip info;
2795 	uint32_t ring_id;
2796 
2797 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2798 	CU_ASSERT_EQUAL(r, 0);
2799 	if (!info.available_rings)
2800 		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2801 
2802 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2803 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2804 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1);
2805 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2806 	}
2807 }
2808 
amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring)2809 static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
2810 						  uint32_t ip_type, uint32_t ring)
2811 {
2812 	amdgpu_context_handle context_handle;
2813 	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2814 	volatile unsigned char *ptr_dst;
2815 	void *ptr_shader;
2816 	unsigned char *ptr_src;
2817 	uint32_t *ptr_cmd;
2818 	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2819 	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2820 	int i, r;
2821 	int bo_dst_size = 0x4000000;
2822 	int bo_shader_size = 0x400000;
2823 	int bo_cmd_size = 4096;
2824 	struct amdgpu_cs_request ibs_request = {0};
2825 	struct amdgpu_cs_ib_info ib_info= {0};
2826 	uint32_t hang_state, hangs, expired;
2827 	struct amdgpu_gpu_info gpu_info = {0};
2828 	amdgpu_bo_list_handle bo_list;
2829 	struct amdgpu_cs_fence fence_status = {0};
2830 
2831 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
2832 	CU_ASSERT_EQUAL(r, 0);
2833 
2834 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2835 	CU_ASSERT_EQUAL(r, 0);
2836 
2837 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2838 				    AMDGPU_GEM_DOMAIN_GTT, 0,
2839 				    &bo_cmd, (void **)&ptr_cmd,
2840 				    &mc_address_cmd, &va_cmd);
2841 	CU_ASSERT_EQUAL(r, 0);
2842 	memset(ptr_cmd, 0, bo_cmd_size);
2843 
2844 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2845 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2846 					&bo_shader, &ptr_shader,
2847 					&mc_address_shader, &va_shader);
2848 	CU_ASSERT_EQUAL(r, 0);
2849 	memset(ptr_shader, 0, bo_shader_size);
2850 
2851 	r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id);
2852 	CU_ASSERT_EQUAL(r, 0);
2853 
2854 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2855 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2856 					&bo_src, (void **)&ptr_src,
2857 					&mc_address_src, &va_src);
2858 	CU_ASSERT_EQUAL(r, 0);
2859 
2860 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2861 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2862 					&bo_dst, (void **)&ptr_dst,
2863 					&mc_address_dst, &va_dst);
2864 	CU_ASSERT_EQUAL(r, 0);
2865 
2866 	memset(ptr_src, 0x55, bo_dst_size);
2867 
2868 	i = 0;
2869 	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2870 
2871 	/*  Issue commands to set cu mask used in current dispatch */
2872 	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2873 
2874 	/* Writes shader state to HW */
2875 	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2876 
2877 	/* Write constant data */
2878 	/* Writes the texture resource constants data to the SGPRs */
2879 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2880 	ptr_cmd[i++] = 0x240;
2881 	ptr_cmd[i++] = mc_address_src;
2882 	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2883 	ptr_cmd[i++] = 0x400000;
2884 	ptr_cmd[i++] = 0x74fac;
2885 
2886 	/* Writes the UAV constant data to the SGPRs. */
2887 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2888 	ptr_cmd[i++] = 0x244;
2889 	ptr_cmd[i++] = mc_address_dst;
2890 	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2891 	ptr_cmd[i++] = 0x400000;
2892 	ptr_cmd[i++] = 0x74fac;
2893 
2894 	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2895 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2896 	ptr_cmd[i++] = 0x215;
2897 	ptr_cmd[i++] = 0;
2898 
2899 	/* dispatch direct command */
2900 	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2901 	ptr_cmd[i++] = 0x10000;
2902 	ptr_cmd[i++] = 1;
2903 	ptr_cmd[i++] = 1;
2904 	ptr_cmd[i++] = 1;
2905 
2906 	while (i & 7)
2907 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2908 
2909 	resources[0] = bo_shader;
2910 	resources[1] = bo_src;
2911 	resources[2] = bo_dst;
2912 	resources[3] = bo_cmd;
2913 	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2914 	CU_ASSERT_EQUAL(r, 0);
2915 
2916 	ib_info.ib_mc_address = mc_address_cmd;
2917 	ib_info.size = i;
2918 	ibs_request.ip_type = ip_type;
2919 	ibs_request.ring = ring;
2920 	ibs_request.resources = bo_list;
2921 	ibs_request.number_of_ibs = 1;
2922 	ibs_request.ibs = &ib_info;
2923 	ibs_request.fence_info.handle = NULL;
2924 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2925 	CU_ASSERT_EQUAL(r, 0);
2926 
2927 	fence_status.ip_type = ip_type;
2928 	fence_status.ip_instance = 0;
2929 	fence_status.ring = ring;
2930 	fence_status.context = context_handle;
2931 	fence_status.fence = ibs_request.seq_no;
2932 
2933 	/* wait for IB accomplished */
2934 	r = amdgpu_cs_query_fence_status(&fence_status,
2935 					 AMDGPU_TIMEOUT_INFINITE,
2936 					 0, &expired);
2937 
2938 	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2939 	CU_ASSERT_EQUAL(r, 0);
2940 	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2941 
2942 	r = amdgpu_bo_list_destroy(bo_list);
2943 	CU_ASSERT_EQUAL(r, 0);
2944 
2945 	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2946 	CU_ASSERT_EQUAL(r, 0);
2947 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2948 	CU_ASSERT_EQUAL(r, 0);
2949 
2950 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2951 	CU_ASSERT_EQUAL(r, 0);
2952 
2953 	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2954 	CU_ASSERT_EQUAL(r, 0);
2955 
2956 	r = amdgpu_cs_ctx_free(context_handle);
2957 	CU_ASSERT_EQUAL(r, 0);
2958 }
2959 
amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle,uint32_t ip_type)2960 void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2961 {
2962 	int r;
2963 	struct drm_amdgpu_info_hw_ip info;
2964 	uint32_t ring_id;
2965 
2966 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2967 	CU_ASSERT_EQUAL(r, 0);
2968 	if (!info.available_rings)
2969 		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2970 
2971 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2972 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2973 		amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id);
2974 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2975 	}
2976 }
2977 
amdgpu_draw_load_ps_shader_hang_slow(uint32_t * ptr,int family)2978 static int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family)
2979 {
2980 	struct amdgpu_test_shader *shader;
2981 	int i, loop = 0x40000;
2982 
2983 	switch (family) {
2984 		case AMDGPU_FAMILY_AI:
2985 		case AMDGPU_FAMILY_RV:
2986 			shader = &memcpy_ps_hang_slow_ai;
2987 			break;
2988 		default:
2989 			return -1;
2990 			break;
2991 	}
2992 
2993 	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2994 
2995 	for (i = 0; i < loop; i++)
2996 		memcpy(ptr + shader->header_length + shader->body_length * i,
2997 			shader->shader + shader->header_length,
2998 			shader->body_length * sizeof(uint32_t));
2999 
3000 	memcpy(ptr + shader->header_length + shader->body_length * loop,
3001 		shader->shader + shader->header_length + shader->body_length,
3002 		shader->foot_length * sizeof(uint32_t));
3003 
3004 	return 0;
3005 }
3006 
amdgpu_draw_load_ps_shader(uint8_t * ptr,int ps_type)3007 static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type)
3008 {
3009 	int i;
3010 	uint32_t shader_offset= 256;
3011 	uint32_t mem_offset, patch_code_offset;
3012 	uint32_t shader_size, patchinfo_code_size;
3013 	const uint32_t *shader;
3014 	const uint32_t *patchinfo_code;
3015 	const uint32_t *patchcode_offset;
3016 
3017 	switch (ps_type) {
3018 		case PS_CONST:
3019 			shader = ps_const_shader_gfx9;
3020 			shader_size = sizeof(ps_const_shader_gfx9);
3021 			patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
3022 			patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
3023 			patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
3024 			break;
3025 		case PS_TEX:
3026 			shader = ps_tex_shader_gfx9;
3027 			shader_size = sizeof(ps_tex_shader_gfx9);
3028 			patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
3029 			patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
3030 			patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
3031 			break;
3032 		case PS_HANG:
3033 			shader = memcpy_ps_hang;
3034 			shader_size = sizeof(memcpy_ps_hang);
3035 
3036 			memcpy(ptr, shader, shader_size);
3037 			return 0;
3038 		default:
3039 			return -1;
3040 			break;
3041 	}
3042 
3043 	/* write main shader program */
3044 	for (i = 0 ; i < 10; i++) {
3045 		mem_offset = i * shader_offset;
3046 		memcpy(ptr + mem_offset, shader, shader_size);
3047 	}
3048 
3049 	/* overwrite patch codes */
3050 	for (i = 0 ; i < 10; i++) {
3051 		mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
3052 		patch_code_offset = i * patchinfo_code_size;
3053 		memcpy(ptr + mem_offset,
3054 			patchinfo_code + patch_code_offset,
3055 			patchinfo_code_size * sizeof(uint32_t));
3056 	}
3057 
3058 	return 0;
3059 }
3060 
3061 /* load RectPosTexFast_VS */
amdgpu_draw_load_vs_shader(uint8_t * ptr)3062 static int amdgpu_draw_load_vs_shader(uint8_t *ptr)
3063 {
3064 	const uint32_t *shader;
3065 	uint32_t shader_size;
3066 
3067 	shader = vs_RectPosTexFast_shader_gfx9;
3068 	shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
3069 
3070 	memcpy(ptr, shader, shader_size);
3071 
3072 	return 0;
3073 }
3074 
amdgpu_draw_init(uint32_t * ptr)3075 static int amdgpu_draw_init(uint32_t *ptr)
3076 {
3077 	int i = 0;
3078 	const uint32_t *preamblecache_ptr;
3079 	uint32_t preamblecache_size;
3080 
3081 	/* Write context control and load shadowing register if necessary */
3082 	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
3083 	ptr[i++] = 0x80000000;
3084 	ptr[i++] = 0x80000000;
3085 
3086 	preamblecache_ptr = preamblecache_gfx9;
3087 	preamblecache_size = sizeof(preamblecache_gfx9);
3088 
3089 	memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
3090 	return i + preamblecache_size/sizeof(uint32_t);
3091 }
3092 
amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t * ptr,uint64_t dst_addr,int hang_slow)3093 static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
3094 							 uint64_t dst_addr,
3095 							 int hang_slow)
3096 {
3097 	int i = 0;
3098 
3099 	/* setup color buffer */
3100 	/* offset   reg
3101 	   0xA318   CB_COLOR0_BASE
3102 	   0xA319   CB_COLOR0_BASE_EXT
3103 	   0xA31A   CB_COLOR0_ATTRIB2
3104 	   0xA31B   CB_COLOR0_VIEW
3105 	   0xA31C   CB_COLOR0_INFO
3106 	   0xA31D   CB_COLOR0_ATTRIB
3107 	   0xA31E   CB_COLOR0_DCC_CONTROL
3108 	   0xA31F   CB_COLOR0_CMASK
3109 	   0xA320   CB_COLOR0_CMASK_BASE_EXT
3110 	   0xA321   CB_COLOR0_FMASK
3111 	   0xA322   CB_COLOR0_FMASK_BASE_EXT
3112 	   0xA323   CB_COLOR0_CLEAR_WORD0
3113 	   0xA324   CB_COLOR0_CLEAR_WORD1
3114 	   0xA325   CB_COLOR0_DCC_BASE
3115 	   0xA326   CB_COLOR0_DCC_BASE_EXT */
3116 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
3117 	ptr[i++] = 0x318;
3118 	ptr[i++] = dst_addr >> 8;
3119 	ptr[i++] = dst_addr >> 40;
3120 	ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f;
3121 	ptr[i++] = 0;
3122 	ptr[i++] = 0x50438;
3123 	ptr[i++] = 0x10140000;
3124 	i += 9;
3125 
3126 	/* mmCB_MRT0_EPITCH */
3127 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3128 	ptr[i++] = 0x1e8;
3129 	ptr[i++] = hang_slow ? 0x7ff : 0x1f;
3130 
3131 	/* 0xA32B   CB_COLOR1_BASE */
3132 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3133 	ptr[i++] = 0x32b;
3134 	ptr[i++] = 0;
3135 
3136 	/* 0xA33A   CB_COLOR1_BASE */
3137 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3138 	ptr[i++] = 0x33a;
3139 	ptr[i++] = 0;
3140 
3141 	/* SPI_SHADER_COL_FORMAT */
3142 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3143 	ptr[i++] = 0x1c5;
3144 	ptr[i++] = 9;
3145 
3146 	/* Setup depth buffer */
3147 	/* mmDB_Z_INFO */
3148 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3149 	ptr[i++] = 0xe;
3150 	i += 2;
3151 
3152 	return i;
3153 }
3154 
amdgpu_draw_setup_and_write_drawblt_state(uint32_t * ptr,int hang_slow)3155 static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow)
3156 {
3157 	int i = 0;
3158 	const uint32_t *cached_cmd_ptr;
3159 	uint32_t cached_cmd_size;
3160 
3161 	/* mmPA_SC_TILE_STEERING_OVERRIDE */
3162 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3163 	ptr[i++] = 0xd7;
3164 	ptr[i++] = 0;
3165 
3166 	ptr[i++] = 0xffff1000;
3167 	ptr[i++] = 0xc0021000;
3168 
3169 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3170 	ptr[i++] = 0xd7;
3171 	ptr[i++] = 1;
3172 
3173 	/* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
3174 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
3175 	ptr[i++] = 0x2fe;
3176 	i += 16;
3177 
3178 	/* mmPA_SC_CENTROID_PRIORITY_0 */
3179 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3180 	ptr[i++] = 0x2f5;
3181 	i += 2;
3182 
3183 	cached_cmd_ptr = cached_cmd_gfx9;
3184 	cached_cmd_size = sizeof(cached_cmd_gfx9);
3185 
3186 	memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
3187 	if (hang_slow)
3188 		*(ptr + i + 12) = 0x8000800;
3189 	i += cached_cmd_size/sizeof(uint32_t);
3190 
3191 	return i;
3192 }
3193 
amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr,int hang_slow)3194 static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
3195 						  int ps_type,
3196 						  uint64_t shader_addr,
3197 						  int hang_slow)
3198 {
3199 	int i = 0;
3200 
3201 	/* mmPA_CL_VS_OUT_CNTL */
3202 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3203 	ptr[i++] = 0x207;
3204 	ptr[i++] = 0;
3205 
3206 	/* mmSPI_SHADER_PGM_RSRC3_VS */
3207 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3208 	ptr[i++] = 0x46;
3209 	ptr[i++] = 0xffff;
3210 
3211 	/* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
3212 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
3213 	ptr[i++] = 0x48;
3214 	ptr[i++] = shader_addr >> 8;
3215 	ptr[i++] = shader_addr >> 40;
3216 
3217 	/* mmSPI_SHADER_PGM_RSRC1_VS */
3218 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3219 	ptr[i++] = 0x4a;
3220 	ptr[i++] = 0xc0081;
3221 	/* mmSPI_SHADER_PGM_RSRC2_VS */
3222 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3223 	ptr[i++] = 0x4b;
3224 	ptr[i++] = 0x18;
3225 
3226 	/* mmSPI_VS_OUT_CONFIG */
3227 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3228 	ptr[i++] = 0x1b1;
3229 	ptr[i++] = 2;
3230 
3231 	/* mmSPI_SHADER_POS_FORMAT */
3232 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3233 	ptr[i++] = 0x1c3;
3234 	ptr[i++] = 4;
3235 
3236 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3237 	ptr[i++] = 0x4c;
3238 	i += 2;
3239 	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3240 	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3241 
3242 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3243 	ptr[i++] = 0x50;
3244 	i += 2;
3245 	if (ps_type == PS_CONST) {
3246 		i += 2;
3247 	} else if (ps_type == PS_TEX) {
3248 		ptr[i++] = 0x3f800000;
3249 		ptr[i++] = 0x3f800000;
3250 	}
3251 
3252 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3253 	ptr[i++] = 0x54;
3254 	i += 4;
3255 
3256 	return i;
3257 }
3258 
amdgpu_draw_ps_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr)3259 static int amdgpu_draw_ps_write2hw(uint32_t *ptr,
3260 				   int ps_type,
3261 				   uint64_t shader_addr)
3262 {
3263 	int i, j;
3264 	const uint32_t *sh_registers;
3265 	const uint32_t *context_registers;
3266 	uint32_t num_sh_reg, num_context_reg;
3267 
3268 	if (ps_type == PS_CONST) {
3269 		sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
3270 		context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
3271 		num_sh_reg = ps_num_sh_registers_gfx9;
3272 		num_context_reg = ps_num_context_registers_gfx9;
3273 	} else if (ps_type == PS_TEX) {
3274 		sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
3275 		context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
3276 		num_sh_reg = ps_num_sh_registers_gfx9;
3277 		num_context_reg = ps_num_context_registers_gfx9;
3278 	}
3279 
3280 	i = 0;
3281 
3282 	/* 0x2c07   SPI_SHADER_PGM_RSRC3_PS
3283 	   0x2c08   SPI_SHADER_PGM_LO_PS
3284 	   0x2c09   SPI_SHADER_PGM_HI_PS */
3285 	shader_addr += 256 * 9;
3286 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
3287 	ptr[i++] = 0x7;
3288 	ptr[i++] = 0xffff;
3289 	ptr[i++] = shader_addr >> 8;
3290 	ptr[i++] = shader_addr >> 40;
3291 
3292 	for (j = 0; j < num_sh_reg; j++) {
3293 		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3294 		ptr[i++] = sh_registers[j * 2] - 0x2c00;
3295 		ptr[i++] = sh_registers[j * 2 + 1];
3296 	}
3297 
3298 	for (j = 0; j < num_context_reg; j++) {
3299 		if (context_registers[j * 2] != 0xA1C5) {
3300 			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3301 			ptr[i++] = context_registers[j * 2] - 0xa000;
3302 			ptr[i++] = context_registers[j * 2 + 1];
3303 		}
3304 
3305 		if (context_registers[j * 2] == 0xA1B4) {
3306 			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3307 			ptr[i++] = 0x1b3;
3308 			ptr[i++] = 2;
3309 		}
3310 	}
3311 
3312 	return i;
3313 }
3314 
amdgpu_draw_draw(uint32_t * ptr)3315 static int amdgpu_draw_draw(uint32_t *ptr)
3316 {
3317 	int i = 0;
3318 
3319 	/* mmIA_MULTI_VGT_PARAM */
3320 	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3321 	ptr[i++] = 0x40000258;
3322 	ptr[i++] = 0xd00ff;
3323 
3324 	/* mmVGT_PRIMITIVE_TYPE */
3325 	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3326 	ptr[i++] = 0x10000242;
3327 	ptr[i++] = 0x11;
3328 
3329 	ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
3330 	ptr[i++] = 3;
3331 	ptr[i++] = 2;
3332 
3333 	return i;
3334 }
3335 
amdgpu_memset_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring_id)3336 void amdgpu_memset_draw(amdgpu_device_handle device_handle,
3337 			amdgpu_bo_handle bo_shader_ps,
3338 			amdgpu_bo_handle bo_shader_vs,
3339 			uint64_t mc_address_shader_ps,
3340 			uint64_t mc_address_shader_vs,
3341 			uint32_t ring_id)
3342 {
3343 	amdgpu_context_handle context_handle;
3344 	amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
3345 	volatile unsigned char *ptr_dst;
3346 	uint32_t *ptr_cmd;
3347 	uint64_t mc_address_dst, mc_address_cmd;
3348 	amdgpu_va_handle va_dst, va_cmd;
3349 	int i, r;
3350 	int bo_dst_size = 16384;
3351 	int bo_cmd_size = 4096;
3352 	struct amdgpu_cs_request ibs_request = {0};
3353 	struct amdgpu_cs_ib_info ib_info = {0};
3354 	struct amdgpu_cs_fence fence_status = {0};
3355 	uint32_t expired;
3356 	amdgpu_bo_list_handle bo_list;
3357 
3358 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3359 	CU_ASSERT_EQUAL(r, 0);
3360 
3361 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3362 					AMDGPU_GEM_DOMAIN_GTT, 0,
3363 					&bo_cmd, (void **)&ptr_cmd,
3364 					&mc_address_cmd, &va_cmd);
3365 	CU_ASSERT_EQUAL(r, 0);
3366 	memset(ptr_cmd, 0, bo_cmd_size);
3367 
3368 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
3369 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3370 					&bo_dst, (void **)&ptr_dst,
3371 					&mc_address_dst, &va_dst);
3372 	CU_ASSERT_EQUAL(r, 0);
3373 
3374 	i = 0;
3375 	i += amdgpu_draw_init(ptr_cmd + i);
3376 
3377 	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3378 
3379 	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3380 
3381 	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0);
3382 
3383 	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps);
3384 
3385 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3386 	ptr_cmd[i++] = 0xc;
3387 	ptr_cmd[i++] = 0x33333333;
3388 	ptr_cmd[i++] = 0x33333333;
3389 	ptr_cmd[i++] = 0x33333333;
3390 	ptr_cmd[i++] = 0x33333333;
3391 
3392 	i += amdgpu_draw_draw(ptr_cmd + i);
3393 
3394 	while (i & 7)
3395 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3396 
3397 	resources[0] = bo_dst;
3398 	resources[1] = bo_shader_ps;
3399 	resources[2] = bo_shader_vs;
3400 	resources[3] = bo_cmd;
3401 	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
3402 	CU_ASSERT_EQUAL(r, 0);
3403 
3404 	ib_info.ib_mc_address = mc_address_cmd;
3405 	ib_info.size = i;
3406 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3407 	ibs_request.ring = ring_id;
3408 	ibs_request.resources = bo_list;
3409 	ibs_request.number_of_ibs = 1;
3410 	ibs_request.ibs = &ib_info;
3411 	ibs_request.fence_info.handle = NULL;
3412 
3413 	/* submit CS */
3414 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3415 	CU_ASSERT_EQUAL(r, 0);
3416 
3417 	r = amdgpu_bo_list_destroy(bo_list);
3418 	CU_ASSERT_EQUAL(r, 0);
3419 
3420 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3421 	fence_status.ip_instance = 0;
3422 	fence_status.ring = ring_id;
3423 	fence_status.context = context_handle;
3424 	fence_status.fence = ibs_request.seq_no;
3425 
3426 	/* wait for IB accomplished */
3427 	r = amdgpu_cs_query_fence_status(&fence_status,
3428 					 AMDGPU_TIMEOUT_INFINITE,
3429 					 0, &expired);
3430 	CU_ASSERT_EQUAL(r, 0);
3431 	CU_ASSERT_EQUAL(expired, true);
3432 
3433 	/* verify if memset test result meets with expected */
3434 	i = 0;
3435 	while(i < bo_dst_size) {
3436 		CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
3437 	}
3438 
3439 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
3440 	CU_ASSERT_EQUAL(r, 0);
3441 
3442 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3443 	CU_ASSERT_EQUAL(r, 0);
3444 
3445 	r = amdgpu_cs_ctx_free(context_handle);
3446 	CU_ASSERT_EQUAL(r, 0);
3447 }
3448 
amdgpu_memset_draw_test(amdgpu_device_handle device_handle,uint32_t ring)3449 static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
3450 				    uint32_t ring)
3451 {
3452 	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3453 	void *ptr_shader_ps;
3454 	void *ptr_shader_vs;
3455 	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3456 	amdgpu_va_handle va_shader_ps, va_shader_vs;
3457 	int r;
3458 	int bo_shader_size = 4096;
3459 
3460 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3461 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3462 					&bo_shader_ps, &ptr_shader_ps,
3463 					&mc_address_shader_ps, &va_shader_ps);
3464 	CU_ASSERT_EQUAL(r, 0);
3465 	memset(ptr_shader_ps, 0, bo_shader_size);
3466 
3467 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3468 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3469 					&bo_shader_vs, &ptr_shader_vs,
3470 					&mc_address_shader_vs, &va_shader_vs);
3471 	CU_ASSERT_EQUAL(r, 0);
3472 	memset(ptr_shader_vs, 0, bo_shader_size);
3473 
3474 	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST);
3475 	CU_ASSERT_EQUAL(r, 0);
3476 
3477 	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3478 	CU_ASSERT_EQUAL(r, 0);
3479 
3480 	amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
3481 			mc_address_shader_ps, mc_address_shader_vs, ring);
3482 
3483 	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3484 	CU_ASSERT_EQUAL(r, 0);
3485 
3486 	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3487 	CU_ASSERT_EQUAL(r, 0);
3488 }
3489 
amdgpu_memcpy_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring,int hang)3490 static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
3491 			       amdgpu_bo_handle bo_shader_ps,
3492 			       amdgpu_bo_handle bo_shader_vs,
3493 			       uint64_t mc_address_shader_ps,
3494 			       uint64_t mc_address_shader_vs,
3495 			       uint32_t ring, int hang)
3496 {
3497 	amdgpu_context_handle context_handle;
3498 	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3499 	volatile unsigned char *ptr_dst;
3500 	unsigned char *ptr_src;
3501 	uint32_t *ptr_cmd;
3502 	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3503 	amdgpu_va_handle va_dst, va_src, va_cmd;
3504 	int i, r;
3505 	int bo_size = 16384;
3506 	int bo_cmd_size = 4096;
3507 	struct amdgpu_cs_request ibs_request = {0};
3508 	struct amdgpu_cs_ib_info ib_info= {0};
3509 	uint32_t hang_state, hangs;
3510 	uint32_t expired;
3511 	amdgpu_bo_list_handle bo_list;
3512 	struct amdgpu_cs_fence fence_status = {0};
3513 
3514 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3515 	CU_ASSERT_EQUAL(r, 0);
3516 
3517 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3518 				    AMDGPU_GEM_DOMAIN_GTT, 0,
3519 				    &bo_cmd, (void **)&ptr_cmd,
3520 				    &mc_address_cmd, &va_cmd);
3521 	CU_ASSERT_EQUAL(r, 0);
3522 	memset(ptr_cmd, 0, bo_cmd_size);
3523 
3524 	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3525 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3526 					&bo_src, (void **)&ptr_src,
3527 					&mc_address_src, &va_src);
3528 	CU_ASSERT_EQUAL(r, 0);
3529 
3530 	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3531 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3532 					&bo_dst, (void **)&ptr_dst,
3533 					&mc_address_dst, &va_dst);
3534 	CU_ASSERT_EQUAL(r, 0);
3535 
3536 	memset(ptr_src, 0x55, bo_size);
3537 
3538 	i = 0;
3539 	i += amdgpu_draw_init(ptr_cmd + i);
3540 
3541 	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3542 
3543 	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3544 
3545 	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0);
3546 
3547 	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3548 
3549 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3550 	ptr_cmd[i++] = 0xc;
3551 	ptr_cmd[i++] = mc_address_src >> 8;
3552 	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3553 	ptr_cmd[i++] = 0x7c01f;
3554 	ptr_cmd[i++] = 0x90500fac;
3555 	ptr_cmd[i++] = 0x3e000;
3556 	i += 3;
3557 
3558 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3559 	ptr_cmd[i++] = 0x14;
3560 	ptr_cmd[i++] = 0x92;
3561 	i += 3;
3562 
3563 	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3564 	ptr_cmd[i++] = 0x191;
3565 	ptr_cmd[i++] = 0;
3566 
3567 	i += amdgpu_draw_draw(ptr_cmd + i);
3568 
3569 	while (i & 7)
3570 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3571 
3572 	resources[0] = bo_dst;
3573 	resources[1] = bo_src;
3574 	resources[2] = bo_shader_ps;
3575 	resources[3] = bo_shader_vs;
3576 	resources[4] = bo_cmd;
3577 	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3578 	CU_ASSERT_EQUAL(r, 0);
3579 
3580 	ib_info.ib_mc_address = mc_address_cmd;
3581 	ib_info.size = i;
3582 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3583 	ibs_request.ring = ring;
3584 	ibs_request.resources = bo_list;
3585 	ibs_request.number_of_ibs = 1;
3586 	ibs_request.ibs = &ib_info;
3587 	ibs_request.fence_info.handle = NULL;
3588 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3589 	CU_ASSERT_EQUAL(r, 0);
3590 
3591 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3592 	fence_status.ip_instance = 0;
3593 	fence_status.ring = ring;
3594 	fence_status.context = context_handle;
3595 	fence_status.fence = ibs_request.seq_no;
3596 
3597 	/* wait for IB accomplished */
3598 	r = amdgpu_cs_query_fence_status(&fence_status,
3599 					 AMDGPU_TIMEOUT_INFINITE,
3600 					 0, &expired);
3601 	if (!hang) {
3602 		CU_ASSERT_EQUAL(r, 0);
3603 		CU_ASSERT_EQUAL(expired, true);
3604 
3605 		/* verify if memcpy test result meets with expected */
3606 		i = 0;
3607 		while(i < bo_size) {
3608 			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
3609 			i++;
3610 		}
3611 	} else {
3612 		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3613 		CU_ASSERT_EQUAL(r, 0);
3614 		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3615 	}
3616 
3617 	r = amdgpu_bo_list_destroy(bo_list);
3618 	CU_ASSERT_EQUAL(r, 0);
3619 
3620 	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3621 	CU_ASSERT_EQUAL(r, 0);
3622 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3623 	CU_ASSERT_EQUAL(r, 0);
3624 
3625 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3626 	CU_ASSERT_EQUAL(r, 0);
3627 
3628 	r = amdgpu_cs_ctx_free(context_handle);
3629 	CU_ASSERT_EQUAL(r, 0);
3630 }
3631 
amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle,uint32_t ring,int hang)3632 void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring,
3633 			     int hang)
3634 {
3635 	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3636 	void *ptr_shader_ps;
3637 	void *ptr_shader_vs;
3638 	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3639 	amdgpu_va_handle va_shader_ps, va_shader_vs;
3640 	int bo_shader_size = 4096;
3641 	enum ps_type ps_type = hang ? PS_HANG : PS_TEX;
3642 	int r;
3643 
3644 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3645 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3646 					&bo_shader_ps, &ptr_shader_ps,
3647 					&mc_address_shader_ps, &va_shader_ps);
3648 	CU_ASSERT_EQUAL(r, 0);
3649 	memset(ptr_shader_ps, 0, bo_shader_size);
3650 
3651 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3652 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3653 					&bo_shader_vs, &ptr_shader_vs,
3654 					&mc_address_shader_vs, &va_shader_vs);
3655 	CU_ASSERT_EQUAL(r, 0);
3656 	memset(ptr_shader_vs, 0, bo_shader_size);
3657 
3658 	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type);
3659 	CU_ASSERT_EQUAL(r, 0);
3660 
3661 	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3662 	CU_ASSERT_EQUAL(r, 0);
3663 
3664 	amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
3665 			mc_address_shader_ps, mc_address_shader_vs, ring, hang);
3666 
3667 	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3668 	CU_ASSERT_EQUAL(r, 0);
3669 
3670 	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3671 	CU_ASSERT_EQUAL(r, 0);
3672 }
3673 
amdgpu_draw_test(void)3674 static void amdgpu_draw_test(void)
3675 {
3676 	int r;
3677 	struct drm_amdgpu_info_hw_ip info;
3678 	uint32_t ring_id;
3679 
3680 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
3681 	CU_ASSERT_EQUAL(r, 0);
3682 	if (!info.available_rings)
3683 		printf("SKIP ... as there's no graphics ring\n");
3684 
3685 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
3686 		amdgpu_memset_draw_test(device_handle, ring_id);
3687 		amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
3688 	}
3689 }
3690 
amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle,uint32_t ring)3691 void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring)
3692 {
3693 	amdgpu_context_handle context_handle;
3694 	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3695 	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3696 	void *ptr_shader_ps;
3697 	void *ptr_shader_vs;
3698 	volatile unsigned char *ptr_dst;
3699 	unsigned char *ptr_src;
3700 	uint32_t *ptr_cmd;
3701 	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3702 	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3703 	amdgpu_va_handle va_shader_ps, va_shader_vs;
3704 	amdgpu_va_handle va_dst, va_src, va_cmd;
3705 	struct amdgpu_gpu_info gpu_info = {0};
3706 	int i, r;
3707 	int bo_size = 0x4000000;
3708 	int bo_shader_ps_size = 0x400000;
3709 	int bo_shader_vs_size = 4096;
3710 	int bo_cmd_size = 4096;
3711 	struct amdgpu_cs_request ibs_request = {0};
3712 	struct amdgpu_cs_ib_info ib_info= {0};
3713 	uint32_t hang_state, hangs, expired;
3714 	amdgpu_bo_list_handle bo_list;
3715 	struct amdgpu_cs_fence fence_status = {0};
3716 
3717 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
3718 	CU_ASSERT_EQUAL(r, 0);
3719 
3720 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3721 	CU_ASSERT_EQUAL(r, 0);
3722 
3723 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3724 				    AMDGPU_GEM_DOMAIN_GTT, 0,
3725 				    &bo_cmd, (void **)&ptr_cmd,
3726 				    &mc_address_cmd, &va_cmd);
3727 	CU_ASSERT_EQUAL(r, 0);
3728 	memset(ptr_cmd, 0, bo_cmd_size);
3729 
3730 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096,
3731 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3732 					&bo_shader_ps, &ptr_shader_ps,
3733 					&mc_address_shader_ps, &va_shader_ps);
3734 	CU_ASSERT_EQUAL(r, 0);
3735 	memset(ptr_shader_ps, 0, bo_shader_ps_size);
3736 
3737 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096,
3738 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3739 					&bo_shader_vs, &ptr_shader_vs,
3740 					&mc_address_shader_vs, &va_shader_vs);
3741 	CU_ASSERT_EQUAL(r, 0);
3742 	memset(ptr_shader_vs, 0, bo_shader_vs_size);
3743 
3744 	r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id);
3745 	CU_ASSERT_EQUAL(r, 0);
3746 
3747 	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3748 	CU_ASSERT_EQUAL(r, 0);
3749 
3750 	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3751 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3752 					&bo_src, (void **)&ptr_src,
3753 					&mc_address_src, &va_src);
3754 	CU_ASSERT_EQUAL(r, 0);
3755 
3756 	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3757 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3758 					&bo_dst, (void **)&ptr_dst,
3759 					&mc_address_dst, &va_dst);
3760 	CU_ASSERT_EQUAL(r, 0);
3761 
3762 	memset(ptr_src, 0x55, bo_size);
3763 
3764 	i = 0;
3765 	i += amdgpu_draw_init(ptr_cmd + i);
3766 
3767 	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1);
3768 
3769 	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1);
3770 
3771 	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX,
3772 							mc_address_shader_vs, 1);
3773 
3774 	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3775 
3776 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3777 	ptr_cmd[i++] = 0xc;
3778 	ptr_cmd[i++] = mc_address_src >> 8;
3779 	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3780 	ptr_cmd[i++] = 0x1ffc7ff;
3781 	ptr_cmd[i++] = 0x90500fac;
3782 	ptr_cmd[i++] = 0xffe000;
3783 	i += 3;
3784 
3785 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3786 	ptr_cmd[i++] = 0x14;
3787 	ptr_cmd[i++] = 0x92;
3788 	i += 3;
3789 
3790 	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3791 	ptr_cmd[i++] = 0x191;
3792 	ptr_cmd[i++] = 0;
3793 
3794 	i += amdgpu_draw_draw(ptr_cmd + i);
3795 
3796 	while (i & 7)
3797 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3798 
3799 	resources[0] = bo_dst;
3800 	resources[1] = bo_src;
3801 	resources[2] = bo_shader_ps;
3802 	resources[3] = bo_shader_vs;
3803 	resources[4] = bo_cmd;
3804 	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3805 	CU_ASSERT_EQUAL(r, 0);
3806 
3807 	ib_info.ib_mc_address = mc_address_cmd;
3808 	ib_info.size = i;
3809 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3810 	ibs_request.ring = ring;
3811 	ibs_request.resources = bo_list;
3812 	ibs_request.number_of_ibs = 1;
3813 	ibs_request.ibs = &ib_info;
3814 	ibs_request.fence_info.handle = NULL;
3815 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3816 	CU_ASSERT_EQUAL(r, 0);
3817 
3818 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3819 	fence_status.ip_instance = 0;
3820 	fence_status.ring = ring;
3821 	fence_status.context = context_handle;
3822 	fence_status.fence = ibs_request.seq_no;
3823 
3824 	/* wait for IB accomplished */
3825 	r = amdgpu_cs_query_fence_status(&fence_status,
3826 					 AMDGPU_TIMEOUT_INFINITE,
3827 					 0, &expired);
3828 
3829 	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3830 	CU_ASSERT_EQUAL(r, 0);
3831 	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3832 
3833 	r = amdgpu_bo_list_destroy(bo_list);
3834 	CU_ASSERT_EQUAL(r, 0);
3835 
3836 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3837 	CU_ASSERT_EQUAL(r, 0);
3838 	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3839 	CU_ASSERT_EQUAL(r, 0);
3840 
3841 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3842 	CU_ASSERT_EQUAL(r, 0);
3843 
3844 	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size);
3845 	CU_ASSERT_EQUAL(r, 0);
3846 	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size);
3847 	CU_ASSERT_EQUAL(r, 0);
3848 
3849 	r = amdgpu_cs_ctx_free(context_handle);
3850 	CU_ASSERT_EQUAL(r, 0);
3851 }
3852 
amdgpu_gpu_reset_test(void)3853 static void amdgpu_gpu_reset_test(void)
3854 {
3855 	int r;
3856 	char debugfs_path[256], tmp[10];
3857 	int fd;
3858 	struct stat sbuf;
3859 	amdgpu_context_handle context_handle;
3860 	uint32_t hang_state, hangs;
3861 
3862 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3863 	CU_ASSERT_EQUAL(r, 0);
3864 
3865 	r = fstat(drm_amdgpu[0], &sbuf);
3866 	CU_ASSERT_EQUAL(r, 0);
3867 
3868 	sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
3869 	fd = open(debugfs_path, O_RDONLY);
3870 	CU_ASSERT(fd >= 0);
3871 
3872 	r = read(fd, tmp, sizeof(tmp)/sizeof(char));
3873 	CU_ASSERT(r > 0);
3874 
3875 	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3876 	CU_ASSERT_EQUAL(r, 0);
3877 	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3878 
3879 	close(fd);
3880 	r = amdgpu_cs_ctx_free(context_handle);
3881 	CU_ASSERT_EQUAL(r, 0);
3882 
3883 	amdgpu_compute_dispatch_test();
3884 	amdgpu_gfx_dispatch_test();
3885 }
3886 
amdgpu_stable_pstate_test(void)3887 static void amdgpu_stable_pstate_test(void)
3888 {
3889 	int r;
3890 	amdgpu_context_handle context_handle;
3891 	uint32_t current_pstate = 0, new_pstate = 0;
3892 
3893 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3894 	CU_ASSERT_EQUAL(r, 0);
3895 
3896 	r = amdgpu_cs_ctx_stable_pstate(context_handle,
3897 					AMDGPU_CTX_OP_GET_STABLE_PSTATE,
3898 					0, &current_pstate);
3899 	CU_ASSERT_EQUAL(r, 0);
3900 	CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_NONE);
3901 
3902 	r = amdgpu_cs_ctx_stable_pstate(context_handle,
3903 					AMDGPU_CTX_OP_SET_STABLE_PSTATE,
3904 					AMDGPU_CTX_STABLE_PSTATE_PEAK, NULL);
3905 	CU_ASSERT_EQUAL(r, 0);
3906 
3907 	r = amdgpu_cs_ctx_stable_pstate(context_handle,
3908 					AMDGPU_CTX_OP_GET_STABLE_PSTATE,
3909 					0, &new_pstate);
3910 	CU_ASSERT_EQUAL(r, 0);
3911 	CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_PEAK);
3912 
3913 	r = amdgpu_cs_ctx_free(context_handle);
3914 	CU_ASSERT_EQUAL(r, 0);
3915 }
3916