1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <unistd.h>
27 #include <sys/types.h>
28 #ifdef MAJOR_IN_SYSMACROS
29 #include <sys/sysmacros.h>
30 #endif
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #if HAVE_ALLOCA_H
34 # include <alloca.h>
35 #endif
36 #include <sys/wait.h>
37
38 #include "CUnit/Basic.h"
39
40 #include "amdgpu_test.h"
41 #include "amdgpu_drm.h"
42 #include "amdgpu_internal.h"
43 #include "util_math.h"
44
45 static amdgpu_device_handle device_handle;
46 static uint32_t major_version;
47 static uint32_t minor_version;
48 static uint32_t family_id;
49 static uint32_t chip_id;
50 static uint32_t chip_rev;
51
52 static void amdgpu_query_info_test(void);
53 static void amdgpu_command_submission_gfx(void);
54 static void amdgpu_command_submission_compute(void);
55 static void amdgpu_command_submission_multi_fence(void);
56 static void amdgpu_command_submission_sdma(void);
57 static void amdgpu_userptr_test(void);
58 static void amdgpu_semaphore_test(void);
59 static void amdgpu_sync_dependency_test(void);
60 static void amdgpu_bo_eviction_test(void);
61 static void amdgpu_compute_dispatch_test(void);
62 static void amdgpu_gfx_dispatch_test(void);
63 static void amdgpu_draw_test(void);
64 static void amdgpu_gpu_reset_test(void);
65 static void amdgpu_stable_pstate_test(void);
66
67 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
68 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
69 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
70 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
71 unsigned ip_type,
72 int instance, int pm4_dw, uint32_t *pm4_src,
73 int res_cnt, amdgpu_bo_handle *resources,
74 struct amdgpu_cs_ib_info *ib_info,
75 struct amdgpu_cs_request *ibs_request);
76
77 CU_TestInfo basic_tests[] = {
78 { "Query Info Test", amdgpu_query_info_test },
79 { "Userptr Test", amdgpu_userptr_test },
80 { "bo eviction Test", amdgpu_bo_eviction_test },
81 { "Command submission Test (GFX)", amdgpu_command_submission_gfx },
82 { "Command submission Test (Compute)", amdgpu_command_submission_compute },
83 { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
84 { "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
85 { "SW semaphore Test", amdgpu_semaphore_test },
86 { "Sync dependency Test", amdgpu_sync_dependency_test },
87 { "Dispatch Test (Compute)", amdgpu_compute_dispatch_test },
88 { "Dispatch Test (GFX)", amdgpu_gfx_dispatch_test },
89 { "Draw Test", amdgpu_draw_test },
90 { "GPU reset Test", amdgpu_gpu_reset_test },
91 { "Stable pstate Test", amdgpu_stable_pstate_test },
92 CU_TEST_INFO_NULL,
93 };
94 #define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
95 #define SDMA_PKT_HEADER_op_offset 0
96 #define SDMA_PKT_HEADER_op_mask 0x000000FF
97 #define SDMA_PKT_HEADER_op_shift 0
98 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
99 #define SDMA_OPCODE_CONSTANT_FILL 11
100 # define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14)
101 /* 0 = byte fill
102 * 2 = DW fill
103 */
104 #define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \
105 (((sub_op) & 0xFF) << 8) | \
106 (((op) & 0xFF) << 0))
107 #define SDMA_OPCODE_WRITE 2
108 # define SDMA_WRITE_SUB_OPCODE_LINEAR 0
109 # define SDMA_WRTIE_SUB_OPCODE_TILED 1
110
111 #define SDMA_OPCODE_COPY 1
112 # define SDMA_COPY_SUB_OPCODE_LINEAR 0
113
114 #define SDMA_OPCODE_ATOMIC 10
115 # define SDMA_ATOMIC_LOOP(x) ((x) << 0)
116 /* 0 - single_pass_atomic.
117 * 1 - loop_until_compare_satisfied.
118 */
119 # define SDMA_ATOMIC_TMZ(x) ((x) << 2)
120 /* 0 - non-TMZ.
121 * 1 - TMZ.
122 */
123 # define SDMA_ATOMIC_OPCODE(x) ((x) << 9)
124 /* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
125 * same as Packet 3
126 */
127
128 #define GFX_COMPUTE_NOP 0xffff1000
129 #define SDMA_NOP 0x0
130
131 /* PM4 */
132 #define PACKET_TYPE0 0
133 #define PACKET_TYPE1 1
134 #define PACKET_TYPE2 2
135 #define PACKET_TYPE3 3
136
137 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
138 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
139 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
140 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
141 #define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
142 ((reg) & 0xFFFF) | \
143 ((n) & 0x3FFF) << 16)
144 #define CP_PACKET2 0x80000000
145 #define PACKET2_PAD_SHIFT 0
146 #define PACKET2_PAD_MASK (0x3fffffff << 0)
147
148 #define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
149
150 #define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
151 (((op) & 0xFF) << 8) | \
152 ((n) & 0x3FFF) << 16)
153 #define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
154
155 /* Packet 3 types */
156 #define PACKET3_NOP 0x10
157
158 #define PACKET3_WRITE_DATA 0x37
159 #define WRITE_DATA_DST_SEL(x) ((x) << 8)
160 /* 0 - register
161 * 1 - memory (sync - via GRBM)
162 * 2 - gl2
163 * 3 - gds
164 * 4 - reserved
165 * 5 - memory (async - direct)
166 */
167 #define WR_ONE_ADDR (1 << 16)
168 #define WR_CONFIRM (1 << 20)
169 #define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
170 /* 0 - LRU
171 * 1 - Stream
172 */
173 #define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
174 /* 0 - me
175 * 1 - pfp
176 * 2 - ce
177 */
178
179 #define PACKET3_ATOMIC_MEM 0x1E
180 #define TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
181 #define ATOMIC_MEM_COMMAND(x) ((x) << 8)
182 /* 0 - single_pass_atomic.
183 * 1 - loop_until_compare_satisfied.
184 */
185 #define ATOMIC_MEM_CACHEPOLICAY(x) ((x) << 25)
186 /* 0 - lru.
187 * 1 - stream.
188 */
189 #define ATOMIC_MEM_ENGINESEL(x) ((x) << 30)
190 /* 0 - micro_engine.
191 */
192
193 #define PACKET3_DMA_DATA 0x50
194 /* 1. header
195 * 2. CONTROL
196 * 3. SRC_ADDR_LO or DATA [31:0]
197 * 4. SRC_ADDR_HI [31:0]
198 * 5. DST_ADDR_LO [31:0]
199 * 6. DST_ADDR_HI [7:0]
200 * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
201 */
202 /* CONTROL */
203 # define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0)
204 /* 0 - ME
205 * 1 - PFP
206 */
207 # define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
208 /* 0 - LRU
209 * 1 - Stream
210 * 2 - Bypass
211 */
212 # define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
213 # define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20)
214 /* 0 - DST_ADDR using DAS
215 * 1 - GDS
216 * 3 - DST_ADDR using L2
217 */
218 # define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
219 /* 0 - LRU
220 * 1 - Stream
221 * 2 - Bypass
222 */
223 # define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
224 # define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29)
225 /* 0 - SRC_ADDR using SAS
226 * 1 - GDS
227 * 2 - DATA
228 * 3 - SRC_ADDR using L2
229 */
230 # define PACKET3_DMA_DATA_CP_SYNC (1 << 31)
231 /* COMMAND */
232 # define PACKET3_DMA_DATA_DIS_WC (1 << 21)
233 # define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
234 /* 0 - none
235 * 1 - 8 in 16
236 * 2 - 8 in 32
237 * 3 - 8 in 64
238 */
239 # define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
240 /* 0 - none
241 * 1 - 8 in 16
242 * 2 - 8 in 32
243 * 3 - 8 in 64
244 */
245 # define PACKET3_DMA_DATA_CMD_SAS (1 << 26)
246 /* 0 - memory
247 * 1 - register
248 */
249 # define PACKET3_DMA_DATA_CMD_DAS (1 << 27)
250 /* 0 - memory
251 * 1 - register
252 */
253 # define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
254 # define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
255 # define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
256
257 #define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \
258 (((b) & 0x1) << 26) | \
259 (((t) & 0x1) << 23) | \
260 (((s) & 0x1) << 22) | \
261 (((cnt) & 0xFFFFF) << 0))
262 #define SDMA_OPCODE_COPY_SI 3
263 #define SDMA_OPCODE_CONSTANT_FILL_SI 13
264 #define SDMA_NOP_SI 0xf
265 #define GFX_COMPUTE_NOP_SI 0x80000000
266 #define PACKET3_DMA_DATA_SI 0x41
267 # define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27)
268 /* 0 - ME
269 * 1 - PFP
270 */
271 # define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20)
272 /* 0 - DST_ADDR using DAS
273 * 1 - GDS
274 * 3 - DST_ADDR using L2
275 */
276 # define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29)
277 /* 0 - SRC_ADDR using SAS
278 * 1 - GDS
279 * 2 - DATA
280 * 3 - SRC_ADDR using L2
281 */
282 # define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31)
283
284
285 #define PKT3_CONTEXT_CONTROL 0x28
286 #define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
287 #define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28)
288 #define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
289
290 #define PKT3_CLEAR_STATE 0x12
291
292 #define PKT3_SET_SH_REG 0x76
293 #define PACKET3_SET_SH_REG_START 0x00002c00
294
295 #define PACKET3_DISPATCH_DIRECT 0x15
296 #define PACKET3_EVENT_WRITE 0x46
297 #define PACKET3_ACQUIRE_MEM 0x58
298 #define PACKET3_SET_CONTEXT_REG 0x69
299 #define PACKET3_SET_UCONFIG_REG 0x79
300 #define PACKET3_DRAW_INDEX_AUTO 0x2D
301 /* gfx 8 */
302 #define mmCOMPUTE_PGM_LO 0x2e0c
303 #define mmCOMPUTE_PGM_RSRC1 0x2e12
304 #define mmCOMPUTE_TMPRING_SIZE 0x2e18
305 #define mmCOMPUTE_USER_DATA_0 0x2e40
306 #define mmCOMPUTE_USER_DATA_1 0x2e41
307 #define mmCOMPUTE_RESOURCE_LIMITS 0x2e15
308 #define mmCOMPUTE_NUM_THREAD_X 0x2e07
309
310
311
312 #define SWAP_32(num) (((num & 0xff000000) >> 24) | \
313 ((num & 0x0000ff00) << 8) | \
314 ((num & 0x00ff0000) >> 8) | \
315 ((num & 0x000000ff) << 24))
316
317
318 /* Shader code
319 * void main()
320 {
321
322 float x = some_input;
323 for (unsigned i = 0; i < 1000000; i++)
324 x = sin(x);
325
326 u[0] = 42u;
327 }
328 */
329
330 static uint32_t shader_bin[] = {
331 SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
332 SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
333 SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
334 SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
335 };
336
337 #define CODE_OFFSET 512
338 #define DATA_OFFSET 1024
339
340 enum cs_type {
341 CS_BUFFERCLEAR,
342 CS_BUFFERCOPY,
343 CS_HANG,
344 CS_HANG_SLOW
345 };
346
347 static const uint32_t bufferclear_cs_shader_gfx9[] = {
348 0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
349 0x7e020280, 0x7e040204, 0x7e060205, 0x7e080206,
350 0x7e0a0207, 0xe01c2000, 0x80000200, 0xbf8c0000,
351 0xbf810000
352 };
353
354 static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
355 {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 },
356 {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 },
357 {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
358 {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
359 {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
360 };
361
362 static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
363
364 static const uint32_t buffercopy_cs_shader_gfx9[] = {
365 0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
366 0x7e020280, 0xe00c2000, 0x80000200, 0xbf8c0f70,
367 0xe01c2000, 0x80010200, 0xbf810000
368 };
369
370 static const uint32_t preamblecache_gfx9[] = {
371 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
372 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
373 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
374 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
375 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
376 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0,
377 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
378 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
379 0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
380 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
381 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
382 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
383 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
384 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
385 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
386 0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
387 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
388 0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
389 0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
390 0xc0017900, 0x24b, 0x0
391 };
392
393 enum ps_type {
394 PS_CONST,
395 PS_TEX,
396 PS_HANG,
397 PS_HANG_SLOW
398 };
399
400 static const uint32_t ps_const_shader_gfx9[] = {
401 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
402 0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
403 0xC4001C0F, 0x00000100, 0xBF810000
404 };
405
406 static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
407
408 static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
409 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
410 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
411 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
412 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
413 { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
414 { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
415 { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
416 { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
417 { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
418 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
419 }
420 };
421
422 static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
423 0x00000004
424 };
425
426 static const uint32_t ps_num_sh_registers_gfx9 = 2;
427
428 static const uint32_t ps_const_sh_registers_gfx9[][2] = {
429 {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
430 {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
431 };
432
433 static const uint32_t ps_num_context_registers_gfx9 = 7;
434
435 static const uint32_t ps_const_context_reg_gfx9[][2] = {
436 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 },
437 {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 },
438 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F },
439 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 },
440 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 },
441 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */},
442 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 }
443 };
444
445 static const uint32_t ps_tex_shader_gfx9[] = {
446 0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
447 0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
448 0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
449 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
450 0x00000100, 0xBF810000
451 };
452
453 static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
454 0x0000000B
455 };
456
457 static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
458
459 static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
460 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
461 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
462 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
463 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
464 { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
465 { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
466 { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
467 { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
468 { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
469 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
470 }
471 };
472
473 static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
474 {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
475 {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
476 };
477
478 static const uint32_t ps_tex_context_reg_gfx9[][2] = {
479 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 },
480 {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 },
481 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F },
482 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 },
483 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 },
484 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */},
485 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 }
486 };
487
488 static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
489 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
490 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
491 0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
492 0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
493 0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
494 0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
495 0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
496 0xC400020F, 0x05060403, 0xBF810000
497 };
498
499 static const uint32_t cached_cmd_gfx9[] = {
500 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
501 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
502 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
503 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x12,
504 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
505 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
506 0xc0026900, 0x292, 0x20, 0x60201b8,
507 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
508 };
509
510 unsigned int memcpy_ps_hang[] = {
511 0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
512 0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
513 0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
514 0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
515 0xF800180F, 0x03020100, 0xBF810000
516 };
517
518 struct amdgpu_test_shader {
519 uint32_t *shader;
520 uint32_t header_length;
521 uint32_t body_length;
522 uint32_t foot_length;
523 };
524
525 unsigned int memcpy_cs_hang_slow_ai_codes[] = {
526 0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
527 0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
528 };
529
530 struct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
531 memcpy_cs_hang_slow_ai_codes,
532 4,
533 3,
534 1
535 };
536
537 unsigned int memcpy_cs_hang_slow_rv_codes[] = {
538 0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
539 0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
540 };
541
542 struct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
543 memcpy_cs_hang_slow_rv_codes,
544 4,
545 3,
546 1
547 };
548
549 unsigned int memcpy_ps_hang_slow_ai_codes[] = {
550 0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
551 0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
552 0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
553 0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
554 0x03020100, 0xbf810000
555 };
556
557 struct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
558 memcpy_ps_hang_slow_ai_codes,
559 7,
560 2,
561 9
562 };
563
amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev,unsigned size,unsigned alignment,unsigned heap,uint64_t alloc_flags,uint64_t mapping_flags,amdgpu_bo_handle * bo,void ** cpu,uint64_t * mc_address,amdgpu_va_handle * va_handle)564 int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
565 unsigned alignment, unsigned heap, uint64_t alloc_flags,
566 uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
567 uint64_t *mc_address,
568 amdgpu_va_handle *va_handle)
569 {
570 struct amdgpu_bo_alloc_request request = {};
571 amdgpu_bo_handle buf_handle;
572 amdgpu_va_handle handle;
573 uint64_t vmc_addr;
574 int r;
575
576 request.alloc_size = size;
577 request.phys_alignment = alignment;
578 request.preferred_heap = heap;
579 request.flags = alloc_flags;
580
581 r = amdgpu_bo_alloc(dev, &request, &buf_handle);
582 if (r)
583 return r;
584
585 r = amdgpu_va_range_alloc(dev,
586 amdgpu_gpu_va_range_general,
587 size, alignment, 0, &vmc_addr,
588 &handle, 0);
589 if (r)
590 goto error_va_alloc;
591
592 r = amdgpu_bo_va_op_raw(dev, buf_handle, 0, ALIGN(size, getpagesize()), vmc_addr,
593 AMDGPU_VM_PAGE_READABLE |
594 AMDGPU_VM_PAGE_WRITEABLE |
595 AMDGPU_VM_PAGE_EXECUTABLE |
596 mapping_flags,
597 AMDGPU_VA_OP_MAP);
598 if (r)
599 goto error_va_map;
600
601 r = amdgpu_bo_cpu_map(buf_handle, cpu);
602 if (r)
603 goto error_cpu_map;
604
605 *bo = buf_handle;
606 *mc_address = vmc_addr;
607 *va_handle = handle;
608
609 return 0;
610
611 error_cpu_map:
612 amdgpu_bo_cpu_unmap(buf_handle);
613
614 error_va_map:
615 amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
616
617 error_va_alloc:
618 amdgpu_bo_free(buf_handle);
619 return r;
620 }
621
622
623
suite_basic_tests_enable(void)624 CU_BOOL suite_basic_tests_enable(void)
625 {
626
627 if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
628 &minor_version, &device_handle))
629 return CU_FALSE;
630
631
632 family_id = device_handle->info.family_id;
633 chip_id = device_handle->info.chip_external_rev;
634 chip_rev = device_handle->info.chip_rev;
635
636 if (amdgpu_device_deinitialize(device_handle))
637 return CU_FALSE;
638
639 /* disable gfx engine basic test cases for some asics have no CPG */
640 if (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) {
641 if (amdgpu_set_test_active("Basic Tests",
642 "Command submission Test (GFX)",
643 CU_FALSE))
644 fprintf(stderr, "test deactivation failed - %s\n",
645 CU_get_error_msg());
646
647 if (amdgpu_set_test_active("Basic Tests",
648 "Command submission Test (Multi-Fence)",
649 CU_FALSE))
650 fprintf(stderr, "test deactivation failed - %s\n",
651 CU_get_error_msg());
652
653 if (amdgpu_set_test_active("Basic Tests",
654 "Sync dependency Test",
655 CU_FALSE))
656 fprintf(stderr, "test deactivation failed - %s\n",
657 CU_get_error_msg());
658 }
659
660 return CU_TRUE;
661 }
662
suite_basic_tests_init(void)663 int suite_basic_tests_init(void)
664 {
665 struct amdgpu_gpu_info gpu_info = {0};
666 int r;
667
668 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
669 &minor_version, &device_handle);
670
671 if (r) {
672 if ((r == -EACCES) && (errno == EACCES))
673 printf("\n\nError:%s. "
674 "Hint:Try to run this test program as root.",
675 strerror(errno));
676 return CUE_SINIT_FAILED;
677 }
678
679 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
680 if (r)
681 return CUE_SINIT_FAILED;
682
683 family_id = gpu_info.family_id;
684
685 return CUE_SUCCESS;
686 }
687
suite_basic_tests_clean(void)688 int suite_basic_tests_clean(void)
689 {
690 int r = amdgpu_device_deinitialize(device_handle);
691
692 if (r == 0)
693 return CUE_SUCCESS;
694 else
695 return CUE_SCLEAN_FAILED;
696 }
697
amdgpu_query_info_test(void)698 static void amdgpu_query_info_test(void)
699 {
700 struct amdgpu_gpu_info gpu_info = {0};
701 uint32_t version, feature;
702 int r;
703
704 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
705 CU_ASSERT_EQUAL(r, 0);
706
707 r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
708 0, &version, &feature);
709 CU_ASSERT_EQUAL(r, 0);
710 }
711
amdgpu_command_submission_gfx_separate_ibs(void)712 static void amdgpu_command_submission_gfx_separate_ibs(void)
713 {
714 amdgpu_context_handle context_handle;
715 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
716 void *ib_result_cpu, *ib_result_ce_cpu;
717 uint64_t ib_result_mc_address, ib_result_ce_mc_address;
718 struct amdgpu_cs_request ibs_request = {0};
719 struct amdgpu_cs_ib_info ib_info[2];
720 struct amdgpu_cs_fence fence_status = {0};
721 uint32_t *ptr;
722 uint32_t expired;
723 amdgpu_bo_list_handle bo_list;
724 amdgpu_va_handle va_handle, va_handle_ce;
725 int r, i = 0;
726
727 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
728 CU_ASSERT_EQUAL(r, 0);
729
730 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
731 AMDGPU_GEM_DOMAIN_GTT, 0,
732 &ib_result_handle, &ib_result_cpu,
733 &ib_result_mc_address, &va_handle);
734 CU_ASSERT_EQUAL(r, 0);
735
736 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
737 AMDGPU_GEM_DOMAIN_GTT, 0,
738 &ib_result_ce_handle, &ib_result_ce_cpu,
739 &ib_result_ce_mc_address, &va_handle_ce);
740 CU_ASSERT_EQUAL(r, 0);
741
742 r = amdgpu_get_bo_list(device_handle, ib_result_handle,
743 ib_result_ce_handle, &bo_list);
744 CU_ASSERT_EQUAL(r, 0);
745
746 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
747
748 /* IT_SET_CE_DE_COUNTERS */
749 ptr = ib_result_ce_cpu;
750 if (family_id != AMDGPU_FAMILY_SI) {
751 ptr[i++] = 0xc0008900;
752 ptr[i++] = 0;
753 }
754 ptr[i++] = 0xc0008400;
755 ptr[i++] = 1;
756 ib_info[0].ib_mc_address = ib_result_ce_mc_address;
757 ib_info[0].size = i;
758 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
759
760 /* IT_WAIT_ON_CE_COUNTER */
761 ptr = ib_result_cpu;
762 ptr[0] = 0xc0008600;
763 ptr[1] = 0x00000001;
764 ib_info[1].ib_mc_address = ib_result_mc_address;
765 ib_info[1].size = 2;
766
767 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
768 ibs_request.number_of_ibs = 2;
769 ibs_request.ibs = ib_info;
770 ibs_request.resources = bo_list;
771 ibs_request.fence_info.handle = NULL;
772
773 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
774
775 CU_ASSERT_EQUAL(r, 0);
776
777 fence_status.context = context_handle;
778 fence_status.ip_type = AMDGPU_HW_IP_GFX;
779 fence_status.ip_instance = 0;
780 fence_status.fence = ibs_request.seq_no;
781
782 r = amdgpu_cs_query_fence_status(&fence_status,
783 AMDGPU_TIMEOUT_INFINITE,
784 0, &expired);
785 CU_ASSERT_EQUAL(r, 0);
786
787 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
788 ib_result_mc_address, 4096);
789 CU_ASSERT_EQUAL(r, 0);
790
791 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
792 ib_result_ce_mc_address, 4096);
793 CU_ASSERT_EQUAL(r, 0);
794
795 r = amdgpu_bo_list_destroy(bo_list);
796 CU_ASSERT_EQUAL(r, 0);
797
798 r = amdgpu_cs_ctx_free(context_handle);
799 CU_ASSERT_EQUAL(r, 0);
800
801 }
802
amdgpu_command_submission_gfx_shared_ib(void)803 static void amdgpu_command_submission_gfx_shared_ib(void)
804 {
805 amdgpu_context_handle context_handle;
806 amdgpu_bo_handle ib_result_handle;
807 void *ib_result_cpu;
808 uint64_t ib_result_mc_address;
809 struct amdgpu_cs_request ibs_request = {0};
810 struct amdgpu_cs_ib_info ib_info[2];
811 struct amdgpu_cs_fence fence_status = {0};
812 uint32_t *ptr;
813 uint32_t expired;
814 amdgpu_bo_list_handle bo_list;
815 amdgpu_va_handle va_handle;
816 int r, i = 0;
817
818 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
819 CU_ASSERT_EQUAL(r, 0);
820
821 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
822 AMDGPU_GEM_DOMAIN_GTT, 0,
823 &ib_result_handle, &ib_result_cpu,
824 &ib_result_mc_address, &va_handle);
825 CU_ASSERT_EQUAL(r, 0);
826
827 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
828 &bo_list);
829 CU_ASSERT_EQUAL(r, 0);
830
831 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
832
833 /* IT_SET_CE_DE_COUNTERS */
834 ptr = ib_result_cpu;
835 if (family_id != AMDGPU_FAMILY_SI) {
836 ptr[i++] = 0xc0008900;
837 ptr[i++] = 0;
838 }
839 ptr[i++] = 0xc0008400;
840 ptr[i++] = 1;
841 ib_info[0].ib_mc_address = ib_result_mc_address;
842 ib_info[0].size = i;
843 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
844
845 ptr = (uint32_t *)ib_result_cpu + 4;
846 ptr[0] = 0xc0008600;
847 ptr[1] = 0x00000001;
848 ib_info[1].ib_mc_address = ib_result_mc_address + 16;
849 ib_info[1].size = 2;
850
851 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
852 ibs_request.number_of_ibs = 2;
853 ibs_request.ibs = ib_info;
854 ibs_request.resources = bo_list;
855 ibs_request.fence_info.handle = NULL;
856
857 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
858
859 CU_ASSERT_EQUAL(r, 0);
860
861 fence_status.context = context_handle;
862 fence_status.ip_type = AMDGPU_HW_IP_GFX;
863 fence_status.ip_instance = 0;
864 fence_status.fence = ibs_request.seq_no;
865
866 r = amdgpu_cs_query_fence_status(&fence_status,
867 AMDGPU_TIMEOUT_INFINITE,
868 0, &expired);
869 CU_ASSERT_EQUAL(r, 0);
870
871 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
872 ib_result_mc_address, 4096);
873 CU_ASSERT_EQUAL(r, 0);
874
875 r = amdgpu_bo_list_destroy(bo_list);
876 CU_ASSERT_EQUAL(r, 0);
877
878 r = amdgpu_cs_ctx_free(context_handle);
879 CU_ASSERT_EQUAL(r, 0);
880 }
881
amdgpu_command_submission_gfx_cp_write_data(void)882 static void amdgpu_command_submission_gfx_cp_write_data(void)
883 {
884 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
885 }
886
amdgpu_command_submission_gfx_cp_const_fill(void)887 static void amdgpu_command_submission_gfx_cp_const_fill(void)
888 {
889 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
890 }
891
amdgpu_command_submission_gfx_cp_copy_data(void)892 static void amdgpu_command_submission_gfx_cp_copy_data(void)
893 {
894 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
895 }
896
amdgpu_bo_eviction_test(void)897 static void amdgpu_bo_eviction_test(void)
898 {
899 const int sdma_write_length = 1024;
900 const int pm4_dw = 256;
901 amdgpu_context_handle context_handle;
902 amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
903 amdgpu_bo_handle *resources;
904 uint32_t *pm4;
905 struct amdgpu_cs_ib_info *ib_info;
906 struct amdgpu_cs_request *ibs_request;
907 uint64_t bo1_mc, bo2_mc;
908 volatile unsigned char *bo1_cpu, *bo2_cpu;
909 int i, j, r, loop1, loop2;
910 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
911 amdgpu_va_handle bo1_va_handle, bo2_va_handle;
912 struct amdgpu_heap_info vram_info, gtt_info;
913
914 pm4 = calloc(pm4_dw, sizeof(*pm4));
915 CU_ASSERT_NOT_EQUAL(pm4, NULL);
916
917 ib_info = calloc(1, sizeof(*ib_info));
918 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
919
920 ibs_request = calloc(1, sizeof(*ibs_request));
921 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
922
923 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
924 CU_ASSERT_EQUAL(r, 0);
925
926 /* prepare resource */
927 resources = calloc(4, sizeof(amdgpu_bo_handle));
928 CU_ASSERT_NOT_EQUAL(resources, NULL);
929
930 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
931 0, &vram_info);
932 CU_ASSERT_EQUAL(r, 0);
933
934 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
935 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
936 CU_ASSERT_EQUAL(r, 0);
937 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
938 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
939 CU_ASSERT_EQUAL(r, 0);
940
941 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
942 0, >t_info);
943 CU_ASSERT_EQUAL(r, 0);
944
945 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
946 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]);
947 CU_ASSERT_EQUAL(r, 0);
948 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
949 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]);
950 CU_ASSERT_EQUAL(r, 0);
951
952
953
954 loop1 = loop2 = 0;
955 /* run 9 circle to test all mapping combination */
956 while(loop1 < 2) {
957 while(loop2 < 2) {
958 /* allocate UC bo1for sDMA use */
959 r = amdgpu_bo_alloc_and_map(device_handle,
960 sdma_write_length, 4096,
961 AMDGPU_GEM_DOMAIN_GTT,
962 gtt_flags[loop1], &bo1,
963 (void**)&bo1_cpu, &bo1_mc,
964 &bo1_va_handle);
965 CU_ASSERT_EQUAL(r, 0);
966
967 /* set bo1 */
968 memset((void*)bo1_cpu, 0xaa, sdma_write_length);
969
970 /* allocate UC bo2 for sDMA use */
971 r = amdgpu_bo_alloc_and_map(device_handle,
972 sdma_write_length, 4096,
973 AMDGPU_GEM_DOMAIN_GTT,
974 gtt_flags[loop2], &bo2,
975 (void**)&bo2_cpu, &bo2_mc,
976 &bo2_va_handle);
977 CU_ASSERT_EQUAL(r, 0);
978
979 /* clear bo2 */
980 memset((void*)bo2_cpu, 0, sdma_write_length);
981
982 resources[0] = bo1;
983 resources[1] = bo2;
984 resources[2] = vram_max[loop2];
985 resources[3] = gtt_max[loop2];
986
987 /* fulfill PM4: test DMA copy linear */
988 i = j = 0;
989 if (family_id == AMDGPU_FAMILY_SI) {
990 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
991 sdma_write_length);
992 pm4[i++] = 0xffffffff & bo2_mc;
993 pm4[i++] = 0xffffffff & bo1_mc;
994 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
995 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
996 } else {
997 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
998 if (family_id >= AMDGPU_FAMILY_AI)
999 pm4[i++] = sdma_write_length - 1;
1000 else
1001 pm4[i++] = sdma_write_length;
1002 pm4[i++] = 0;
1003 pm4[i++] = 0xffffffff & bo1_mc;
1004 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1005 pm4[i++] = 0xffffffff & bo2_mc;
1006 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1007 }
1008
1009 amdgpu_test_exec_cs_helper(context_handle,
1010 AMDGPU_HW_IP_DMA, 0,
1011 i, pm4,
1012 4, resources,
1013 ib_info, ibs_request);
1014
1015 /* verify if SDMA test result meets with expected */
1016 i = 0;
1017 while(i < sdma_write_length) {
1018 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1019 }
1020 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1021 sdma_write_length);
1022 CU_ASSERT_EQUAL(r, 0);
1023 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1024 sdma_write_length);
1025 CU_ASSERT_EQUAL(r, 0);
1026 loop2++;
1027 }
1028 loop2 = 0;
1029 loop1++;
1030 }
1031 amdgpu_bo_free(vram_max[0]);
1032 amdgpu_bo_free(vram_max[1]);
1033 amdgpu_bo_free(gtt_max[0]);
1034 amdgpu_bo_free(gtt_max[1]);
1035 /* clean resources */
1036 free(resources);
1037 free(ibs_request);
1038 free(ib_info);
1039 free(pm4);
1040
1041 /* end of test */
1042 r = amdgpu_cs_ctx_free(context_handle);
1043 CU_ASSERT_EQUAL(r, 0);
1044 }
1045
1046
amdgpu_command_submission_gfx(void)1047 static void amdgpu_command_submission_gfx(void)
1048 {
1049 /* write data using the CP */
1050 amdgpu_command_submission_gfx_cp_write_data();
1051 /* const fill using the CP */
1052 amdgpu_command_submission_gfx_cp_const_fill();
1053 /* copy data using the CP */
1054 amdgpu_command_submission_gfx_cp_copy_data();
1055 /* separate IB buffers for multi-IB submission */
1056 amdgpu_command_submission_gfx_separate_ibs();
1057 /* shared IB buffer for multi-IB submission */
1058 amdgpu_command_submission_gfx_shared_ib();
1059 }
1060
amdgpu_semaphore_test(void)1061 static void amdgpu_semaphore_test(void)
1062 {
1063 amdgpu_context_handle context_handle[2];
1064 amdgpu_semaphore_handle sem;
1065 amdgpu_bo_handle ib_result_handle[2];
1066 void *ib_result_cpu[2];
1067 uint64_t ib_result_mc_address[2];
1068 struct amdgpu_cs_request ibs_request[2] = {0};
1069 struct amdgpu_cs_ib_info ib_info[2] = {0};
1070 struct amdgpu_cs_fence fence_status = {0};
1071 uint32_t *ptr;
1072 uint32_t expired;
1073 uint32_t sdma_nop, gfx_nop;
1074 amdgpu_bo_list_handle bo_list[2];
1075 amdgpu_va_handle va_handle[2];
1076 int r, i;
1077 struct amdgpu_gpu_info gpu_info = {0};
1078 unsigned gc_ip_type;
1079
1080 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
1081 CU_ASSERT_EQUAL(r, 0);
1082
1083 gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ?
1084 AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX;
1085
1086 if (family_id == AMDGPU_FAMILY_SI) {
1087 sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1088 gfx_nop = GFX_COMPUTE_NOP_SI;
1089 } else {
1090 sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1091 gfx_nop = GFX_COMPUTE_NOP;
1092 }
1093
1094 r = amdgpu_cs_create_semaphore(&sem);
1095 CU_ASSERT_EQUAL(r, 0);
1096 for (i = 0; i < 2; i++) {
1097 r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
1098 CU_ASSERT_EQUAL(r, 0);
1099
1100 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1101 AMDGPU_GEM_DOMAIN_GTT, 0,
1102 &ib_result_handle[i], &ib_result_cpu[i],
1103 &ib_result_mc_address[i], &va_handle[i]);
1104 CU_ASSERT_EQUAL(r, 0);
1105
1106 r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
1107 NULL, &bo_list[i]);
1108 CU_ASSERT_EQUAL(r, 0);
1109 }
1110
1111 /* 1. same context different engine */
1112 ptr = ib_result_cpu[0];
1113 ptr[0] = sdma_nop;
1114 ib_info[0].ib_mc_address = ib_result_mc_address[0];
1115 ib_info[0].size = 1;
1116
1117 ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
1118 ibs_request[0].number_of_ibs = 1;
1119 ibs_request[0].ibs = &ib_info[0];
1120 ibs_request[0].resources = bo_list[0];
1121 ibs_request[0].fence_info.handle = NULL;
1122 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1123 CU_ASSERT_EQUAL(r, 0);
1124 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
1125 CU_ASSERT_EQUAL(r, 0);
1126
1127 r = amdgpu_cs_wait_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
1128 CU_ASSERT_EQUAL(r, 0);
1129 ptr = ib_result_cpu[1];
1130 ptr[0] = gfx_nop;
1131 ib_info[1].ib_mc_address = ib_result_mc_address[1];
1132 ib_info[1].size = 1;
1133
1134 ibs_request[1].ip_type = gc_ip_type;
1135 ibs_request[1].number_of_ibs = 1;
1136 ibs_request[1].ibs = &ib_info[1];
1137 ibs_request[1].resources = bo_list[1];
1138 ibs_request[1].fence_info.handle = NULL;
1139
1140 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
1141 CU_ASSERT_EQUAL(r, 0);
1142
1143 fence_status.context = context_handle[0];
1144 fence_status.ip_type = gc_ip_type;
1145 fence_status.ip_instance = 0;
1146 fence_status.fence = ibs_request[1].seq_no;
1147 r = amdgpu_cs_query_fence_status(&fence_status,
1148 500000000, 0, &expired);
1149 CU_ASSERT_EQUAL(r, 0);
1150 CU_ASSERT_EQUAL(expired, true);
1151
1152 /* 2. same engine different context */
1153 ptr = ib_result_cpu[0];
1154 ptr[0] = gfx_nop;
1155 ib_info[0].ib_mc_address = ib_result_mc_address[0];
1156 ib_info[0].size = 1;
1157
1158 ibs_request[0].ip_type = gc_ip_type;
1159 ibs_request[0].number_of_ibs = 1;
1160 ibs_request[0].ibs = &ib_info[0];
1161 ibs_request[0].resources = bo_list[0];
1162 ibs_request[0].fence_info.handle = NULL;
1163 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1164 CU_ASSERT_EQUAL(r, 0);
1165 r = amdgpu_cs_signal_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
1166 CU_ASSERT_EQUAL(r, 0);
1167
1168 r = amdgpu_cs_wait_semaphore(context_handle[1], gc_ip_type, 0, 0, sem);
1169 CU_ASSERT_EQUAL(r, 0);
1170 ptr = ib_result_cpu[1];
1171 ptr[0] = gfx_nop;
1172 ib_info[1].ib_mc_address = ib_result_mc_address[1];
1173 ib_info[1].size = 1;
1174
1175 ibs_request[1].ip_type = gc_ip_type;
1176 ibs_request[1].number_of_ibs = 1;
1177 ibs_request[1].ibs = &ib_info[1];
1178 ibs_request[1].resources = bo_list[1];
1179 ibs_request[1].fence_info.handle = NULL;
1180 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
1181
1182 CU_ASSERT_EQUAL(r, 0);
1183
1184 fence_status.context = context_handle[1];
1185 fence_status.ip_type = gc_ip_type;
1186 fence_status.ip_instance = 0;
1187 fence_status.fence = ibs_request[1].seq_no;
1188 r = amdgpu_cs_query_fence_status(&fence_status,
1189 500000000, 0, &expired);
1190 CU_ASSERT_EQUAL(r, 0);
1191 CU_ASSERT_EQUAL(expired, true);
1192
1193 for (i = 0; i < 2; i++) {
1194 r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
1195 ib_result_mc_address[i], 4096);
1196 CU_ASSERT_EQUAL(r, 0);
1197
1198 r = amdgpu_bo_list_destroy(bo_list[i]);
1199 CU_ASSERT_EQUAL(r, 0);
1200
1201 r = amdgpu_cs_ctx_free(context_handle[i]);
1202 CU_ASSERT_EQUAL(r, 0);
1203 }
1204
1205 r = amdgpu_cs_destroy_semaphore(sem);
1206 CU_ASSERT_EQUAL(r, 0);
1207 }
1208
amdgpu_command_submission_compute_nop(void)1209 static void amdgpu_command_submission_compute_nop(void)
1210 {
1211 amdgpu_context_handle context_handle;
1212 amdgpu_bo_handle ib_result_handle;
1213 void *ib_result_cpu;
1214 uint64_t ib_result_mc_address;
1215 struct amdgpu_cs_request ibs_request;
1216 struct amdgpu_cs_ib_info ib_info;
1217 struct amdgpu_cs_fence fence_status;
1218 uint32_t *ptr;
1219 uint32_t expired;
1220 int r, instance;
1221 amdgpu_bo_list_handle bo_list;
1222 amdgpu_va_handle va_handle;
1223 struct drm_amdgpu_info_hw_ip info;
1224
1225 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1226 CU_ASSERT_EQUAL(r, 0);
1227
1228 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1229 CU_ASSERT_EQUAL(r, 0);
1230
1231 for (instance = 0; (1 << instance) & info.available_rings; instance++) {
1232 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1233 AMDGPU_GEM_DOMAIN_GTT, 0,
1234 &ib_result_handle, &ib_result_cpu,
1235 &ib_result_mc_address, &va_handle);
1236 CU_ASSERT_EQUAL(r, 0);
1237
1238 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1239 &bo_list);
1240 CU_ASSERT_EQUAL(r, 0);
1241
1242 ptr = ib_result_cpu;
1243 memset(ptr, 0, 16);
1244 ptr[0]=PACKET3(PACKET3_NOP, 14);
1245
1246 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1247 ib_info.ib_mc_address = ib_result_mc_address;
1248 ib_info.size = 16;
1249
1250 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1251 ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
1252 ibs_request.ring = instance;
1253 ibs_request.number_of_ibs = 1;
1254 ibs_request.ibs = &ib_info;
1255 ibs_request.resources = bo_list;
1256 ibs_request.fence_info.handle = NULL;
1257
1258 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1259 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
1260 CU_ASSERT_EQUAL(r, 0);
1261
1262 fence_status.context = context_handle;
1263 fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
1264 fence_status.ip_instance = 0;
1265 fence_status.ring = instance;
1266 fence_status.fence = ibs_request.seq_no;
1267
1268 r = amdgpu_cs_query_fence_status(&fence_status,
1269 AMDGPU_TIMEOUT_INFINITE,
1270 0, &expired);
1271 CU_ASSERT_EQUAL(r, 0);
1272
1273 r = amdgpu_bo_list_destroy(bo_list);
1274 CU_ASSERT_EQUAL(r, 0);
1275
1276 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1277 ib_result_mc_address, 4096);
1278 CU_ASSERT_EQUAL(r, 0);
1279 }
1280
1281 r = amdgpu_cs_ctx_free(context_handle);
1282 CU_ASSERT_EQUAL(r, 0);
1283 }
1284
amdgpu_command_submission_compute_cp_write_data(void)1285 static void amdgpu_command_submission_compute_cp_write_data(void)
1286 {
1287 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
1288 }
1289
amdgpu_command_submission_compute_cp_const_fill(void)1290 static void amdgpu_command_submission_compute_cp_const_fill(void)
1291 {
1292 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
1293 }
1294
amdgpu_command_submission_compute_cp_copy_data(void)1295 static void amdgpu_command_submission_compute_cp_copy_data(void)
1296 {
1297 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
1298 }
1299
amdgpu_command_submission_compute(void)1300 static void amdgpu_command_submission_compute(void)
1301 {
1302 /* write data using the CP */
1303 amdgpu_command_submission_compute_cp_write_data();
1304 /* const fill using the CP */
1305 amdgpu_command_submission_compute_cp_const_fill();
1306 /* copy data using the CP */
1307 amdgpu_command_submission_compute_cp_copy_data();
1308 /* nop test */
1309 amdgpu_command_submission_compute_nop();
1310 }
1311
1312 /*
1313 * caller need create/release:
1314 * pm4_src, resources, ib_info, and ibs_request
1315 * submit command stream described in ibs_request and wait for this IB accomplished
1316 */
1317 void
amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request,bool secure)1318 amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,
1319 amdgpu_context_handle context_handle,
1320 unsigned ip_type, int instance, int pm4_dw,
1321 uint32_t *pm4_src, int res_cnt,
1322 amdgpu_bo_handle *resources,
1323 struct amdgpu_cs_ib_info *ib_info,
1324 struct amdgpu_cs_request *ibs_request,
1325 bool secure)
1326 {
1327 int r;
1328 uint32_t expired;
1329 uint32_t *ring_ptr;
1330 amdgpu_bo_handle ib_result_handle;
1331 void *ib_result_cpu;
1332 uint64_t ib_result_mc_address;
1333 struct amdgpu_cs_fence fence_status = {0};
1334 amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
1335 amdgpu_va_handle va_handle;
1336
1337 /* prepare CS */
1338 CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
1339 CU_ASSERT_NOT_EQUAL(resources, NULL);
1340 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1341 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1342 CU_ASSERT_TRUE(pm4_dw <= 1024);
1343
1344 /* allocate IB */
1345 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1346 AMDGPU_GEM_DOMAIN_GTT, 0,
1347 &ib_result_handle, &ib_result_cpu,
1348 &ib_result_mc_address, &va_handle);
1349 CU_ASSERT_EQUAL(r, 0);
1350
1351 /* copy PM4 packet to ring from caller */
1352 ring_ptr = ib_result_cpu;
1353 memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
1354
1355 ib_info->ib_mc_address = ib_result_mc_address;
1356 ib_info->size = pm4_dw;
1357 if (secure)
1358 ib_info->flags |= AMDGPU_IB_FLAGS_SECURE;
1359
1360 ibs_request->ip_type = ip_type;
1361 ibs_request->ring = instance;
1362 ibs_request->number_of_ibs = 1;
1363 ibs_request->ibs = ib_info;
1364 ibs_request->fence_info.handle = NULL;
1365
1366 memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
1367 all_res[res_cnt] = ib_result_handle;
1368
1369 r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
1370 NULL, &ibs_request->resources);
1371 CU_ASSERT_EQUAL(r, 0);
1372
1373 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1374
1375 /* submit CS */
1376 r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
1377 CU_ASSERT_EQUAL(r, 0);
1378
1379 r = amdgpu_bo_list_destroy(ibs_request->resources);
1380 CU_ASSERT_EQUAL(r, 0);
1381
1382 fence_status.ip_type = ip_type;
1383 fence_status.ip_instance = 0;
1384 fence_status.ring = ibs_request->ring;
1385 fence_status.context = context_handle;
1386 fence_status.fence = ibs_request->seq_no;
1387
1388 /* wait for IB accomplished */
1389 r = amdgpu_cs_query_fence_status(&fence_status,
1390 AMDGPU_TIMEOUT_INFINITE,
1391 0, &expired);
1392 CU_ASSERT_EQUAL(r, 0);
1393 CU_ASSERT_EQUAL(expired, true);
1394
1395 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1396 ib_result_mc_address, 4096);
1397 CU_ASSERT_EQUAL(r, 0);
1398 }
1399
1400 static void
amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request)1401 amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
1402 unsigned ip_type, int instance, int pm4_dw,
1403 uint32_t *pm4_src, int res_cnt,
1404 amdgpu_bo_handle *resources,
1405 struct amdgpu_cs_ib_info *ib_info,
1406 struct amdgpu_cs_request *ibs_request)
1407 {
1408 amdgpu_test_exec_cs_helper_raw(device_handle, context_handle,
1409 ip_type, instance, pm4_dw, pm4_src,
1410 res_cnt, resources, ib_info,
1411 ibs_request, false);
1412 }
1413
1414 void
amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle device,unsigned ip_type,bool secure)1415 amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle
1416 device, unsigned
1417 ip_type, bool secure)
1418 {
1419 const int sdma_write_length = 128;
1420 const int pm4_dw = 256;
1421 amdgpu_context_handle context_handle;
1422 amdgpu_bo_handle bo;
1423 amdgpu_bo_handle *resources;
1424 uint32_t *pm4;
1425 struct amdgpu_cs_ib_info *ib_info;
1426 struct amdgpu_cs_request *ibs_request;
1427 uint64_t bo_mc;
1428 volatile uint32_t *bo_cpu;
1429 uint32_t bo_cpu_origin;
1430 int i, j, r, loop, ring_id;
1431 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1432 amdgpu_va_handle va_handle;
1433 struct drm_amdgpu_info_hw_ip hw_ip_info;
1434
1435 pm4 = calloc(pm4_dw, sizeof(*pm4));
1436 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1437
1438 ib_info = calloc(1, sizeof(*ib_info));
1439 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1440
1441 ibs_request = calloc(1, sizeof(*ibs_request));
1442 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1443
1444 r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info);
1445 CU_ASSERT_EQUAL(r, 0);
1446
1447 for (i = 0; secure && (i < 2); i++)
1448 gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED;
1449
1450 r = amdgpu_cs_ctx_create(device, &context_handle);
1451
1452 CU_ASSERT_EQUAL(r, 0);
1453
1454 /* prepare resource */
1455 resources = calloc(1, sizeof(amdgpu_bo_handle));
1456 CU_ASSERT_NOT_EQUAL(resources, NULL);
1457
1458 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1459 loop = 0;
1460 while(loop < 2) {
1461 /* allocate UC bo for sDMA use */
1462 r = amdgpu_bo_alloc_and_map(device,
1463 sdma_write_length * sizeof(uint32_t),
1464 4096, AMDGPU_GEM_DOMAIN_GTT,
1465 gtt_flags[loop], &bo, (void**)&bo_cpu,
1466 &bo_mc, &va_handle);
1467 CU_ASSERT_EQUAL(r, 0);
1468
1469 /* clear bo */
1470 memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1471
1472 resources[0] = bo;
1473
1474 /* fulfill PM4: test DMA write-linear */
1475 i = j = 0;
1476 if (ip_type == AMDGPU_HW_IP_DMA) {
1477 if (family_id == AMDGPU_FAMILY_SI)
1478 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1479 sdma_write_length);
1480 else
1481 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1482 SDMA_WRITE_SUB_OPCODE_LINEAR,
1483 secure ? SDMA_ATOMIC_TMZ(1) : 0);
1484 pm4[i++] = 0xfffffffc & bo_mc;
1485 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1486 if (family_id >= AMDGPU_FAMILY_AI)
1487 pm4[i++] = sdma_write_length - 1;
1488 else if (family_id != AMDGPU_FAMILY_SI)
1489 pm4[i++] = sdma_write_length;
1490 while(j++ < sdma_write_length)
1491 pm4[i++] = 0xdeadbeaf;
1492 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1493 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1494 pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1495 pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1496 pm4[i++] = 0xfffffffc & bo_mc;
1497 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1498 while(j++ < sdma_write_length)
1499 pm4[i++] = 0xdeadbeaf;
1500 }
1501
1502 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1503 ip_type, ring_id, i, pm4,
1504 1, resources, ib_info,
1505 ibs_request, secure);
1506
1507 /* verify if SDMA test result meets with expected */
1508 i = 0;
1509 if (!secure) {
1510 while(i < sdma_write_length) {
1511 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1512 }
1513 } else if (ip_type == AMDGPU_HW_IP_GFX) {
1514 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1515 pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7);
1516 /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1517 * command, 1-loop_until_compare_satisfied.
1518 * single_pass_atomic, 0-lru
1519 * engine_sel, 0-micro_engine
1520 */
1521 pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 |
1522 ATOMIC_MEM_COMMAND(1) |
1523 ATOMIC_MEM_CACHEPOLICAY(0) |
1524 ATOMIC_MEM_ENGINESEL(0));
1525 pm4[i++] = 0xfffffffc & bo_mc;
1526 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1527 pm4[i++] = 0x12345678;
1528 pm4[i++] = 0x0;
1529 pm4[i++] = 0xdeadbeaf;
1530 pm4[i++] = 0x0;
1531 pm4[i++] = 0x100;
1532 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1533 ip_type, ring_id, i, pm4,
1534 1, resources, ib_info,
1535 ibs_request, true);
1536 } else if (ip_type == AMDGPU_HW_IP_DMA) {
1537 /* restore the bo_cpu to compare */
1538 bo_cpu_origin = bo_cpu[0];
1539 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1540 /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1541 * loop, 1-loop_until_compare_satisfied.
1542 * single_pass_atomic, 0-lru
1543 */
1544 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1545 0,
1546 SDMA_ATOMIC_LOOP(1) |
1547 SDMA_ATOMIC_TMZ(1) |
1548 SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1549 pm4[i++] = 0xfffffffc & bo_mc;
1550 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1551 pm4[i++] = 0x12345678;
1552 pm4[i++] = 0x0;
1553 pm4[i++] = 0xdeadbeaf;
1554 pm4[i++] = 0x0;
1555 pm4[i++] = 0x100;
1556 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1557 ip_type, ring_id, i, pm4,
1558 1, resources, ib_info,
1559 ibs_request, true);
1560 /* DMA's atomic behavir is unlike GFX
1561 * If the comparing data is not equal to destination data,
1562 * For GFX, loop again till gfx timeout(system hang).
1563 * For DMA, loop again till timer expired and then send interrupt.
1564 * So testcase can't use interrupt mechanism.
1565 * We take another way to verify. When the comparing data is not
1566 * equal to destination data, overwrite the source data to the destination
1567 * buffer. Otherwise, original destination data unchanged.
1568 * So if the bo_cpu data is overwritten, the result is passed.
1569 */
1570 CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin);
1571
1572 /* compare again for the case of dest_data != cmp_data */
1573 i = 0;
1574 /* restore again, here dest_data should be */
1575 bo_cpu_origin = bo_cpu[0];
1576 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1577 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1578 0,
1579 SDMA_ATOMIC_LOOP(1) |
1580 SDMA_ATOMIC_TMZ(1) |
1581 SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1582 pm4[i++] = 0xfffffffc & bo_mc;
1583 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1584 pm4[i++] = 0x87654321;
1585 pm4[i++] = 0x0;
1586 pm4[i++] = 0xdeadbeaf;
1587 pm4[i++] = 0x0;
1588 pm4[i++] = 0x100;
1589 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1590 ip_type, ring_id, i, pm4,
1591 1, resources, ib_info,
1592 ibs_request, true);
1593 /* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/
1594 CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin);
1595 }
1596
1597 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1598 sdma_write_length * sizeof(uint32_t));
1599 CU_ASSERT_EQUAL(r, 0);
1600 loop++;
1601 }
1602 }
1603 /* clean resources */
1604 free(resources);
1605 free(ibs_request);
1606 free(ib_info);
1607 free(pm4);
1608
1609 /* end of test */
1610 r = amdgpu_cs_ctx_free(context_handle);
1611 CU_ASSERT_EQUAL(r, 0);
1612 }
1613
amdgpu_command_submission_write_linear_helper(unsigned ip_type)1614 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1615 {
1616 amdgpu_command_submission_write_linear_helper_with_secure(device_handle,
1617 ip_type,
1618 false);
1619 }
1620
amdgpu_command_submission_sdma_write_linear(void)1621 static void amdgpu_command_submission_sdma_write_linear(void)
1622 {
1623 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1624 }
1625
amdgpu_command_submission_const_fill_helper(unsigned ip_type)1626 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1627 {
1628 const int sdma_write_length = 1024 * 1024;
1629 const int pm4_dw = 256;
1630 amdgpu_context_handle context_handle;
1631 amdgpu_bo_handle bo;
1632 amdgpu_bo_handle *resources;
1633 uint32_t *pm4;
1634 struct amdgpu_cs_ib_info *ib_info;
1635 struct amdgpu_cs_request *ibs_request;
1636 uint64_t bo_mc;
1637 volatile uint32_t *bo_cpu;
1638 int i, j, r, loop, ring_id;
1639 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1640 amdgpu_va_handle va_handle;
1641 struct drm_amdgpu_info_hw_ip hw_ip_info;
1642
1643 pm4 = calloc(pm4_dw, sizeof(*pm4));
1644 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1645
1646 ib_info = calloc(1, sizeof(*ib_info));
1647 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1648
1649 ibs_request = calloc(1, sizeof(*ibs_request));
1650 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1651
1652 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1653 CU_ASSERT_EQUAL(r, 0);
1654
1655 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1656 CU_ASSERT_EQUAL(r, 0);
1657
1658 /* prepare resource */
1659 resources = calloc(1, sizeof(amdgpu_bo_handle));
1660 CU_ASSERT_NOT_EQUAL(resources, NULL);
1661
1662 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1663 loop = 0;
1664 while(loop < 2) {
1665 /* allocate UC bo for sDMA use */
1666 r = amdgpu_bo_alloc_and_map(device_handle,
1667 sdma_write_length, 4096,
1668 AMDGPU_GEM_DOMAIN_GTT,
1669 gtt_flags[loop], &bo, (void**)&bo_cpu,
1670 &bo_mc, &va_handle);
1671 CU_ASSERT_EQUAL(r, 0);
1672
1673 /* clear bo */
1674 memset((void*)bo_cpu, 0, sdma_write_length);
1675
1676 resources[0] = bo;
1677
1678 /* fulfill PM4: test DMA const fill */
1679 i = j = 0;
1680 if (ip_type == AMDGPU_HW_IP_DMA) {
1681 if (family_id == AMDGPU_FAMILY_SI) {
1682 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1683 0, 0, 0,
1684 sdma_write_length / 4);
1685 pm4[i++] = 0xfffffffc & bo_mc;
1686 pm4[i++] = 0xdeadbeaf;
1687 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1688 } else {
1689 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1690 SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1691 pm4[i++] = 0xffffffff & bo_mc;
1692 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1693 pm4[i++] = 0xdeadbeaf;
1694 if (family_id >= AMDGPU_FAMILY_AI)
1695 pm4[i++] = sdma_write_length - 1;
1696 else
1697 pm4[i++] = sdma_write_length;
1698 }
1699 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1700 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1701 if (family_id == AMDGPU_FAMILY_SI) {
1702 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1703 pm4[i++] = 0xdeadbeaf;
1704 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1705 PACKET3_DMA_DATA_SI_DST_SEL(0) |
1706 PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1707 PACKET3_DMA_DATA_SI_CP_SYNC;
1708 pm4[i++] = 0xffffffff & bo_mc;
1709 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1710 pm4[i++] = sdma_write_length;
1711 } else {
1712 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1713 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1714 PACKET3_DMA_DATA_DST_SEL(0) |
1715 PACKET3_DMA_DATA_SRC_SEL(2) |
1716 PACKET3_DMA_DATA_CP_SYNC;
1717 pm4[i++] = 0xdeadbeaf;
1718 pm4[i++] = 0;
1719 pm4[i++] = 0xfffffffc & bo_mc;
1720 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1721 pm4[i++] = sdma_write_length;
1722 }
1723 }
1724
1725 amdgpu_test_exec_cs_helper(context_handle,
1726 ip_type, ring_id,
1727 i, pm4,
1728 1, resources,
1729 ib_info, ibs_request);
1730
1731 /* verify if SDMA test result meets with expected */
1732 i = 0;
1733 while(i < (sdma_write_length / 4)) {
1734 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1735 }
1736
1737 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1738 sdma_write_length);
1739 CU_ASSERT_EQUAL(r, 0);
1740 loop++;
1741 }
1742 }
1743 /* clean resources */
1744 free(resources);
1745 free(ibs_request);
1746 free(ib_info);
1747 free(pm4);
1748
1749 /* end of test */
1750 r = amdgpu_cs_ctx_free(context_handle);
1751 CU_ASSERT_EQUAL(r, 0);
1752 }
1753
amdgpu_command_submission_sdma_const_fill(void)1754 static void amdgpu_command_submission_sdma_const_fill(void)
1755 {
1756 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1757 }
1758
amdgpu_command_submission_copy_linear_helper(unsigned ip_type)1759 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1760 {
1761 const int sdma_write_length = 1024;
1762 const int pm4_dw = 256;
1763 amdgpu_context_handle context_handle;
1764 amdgpu_bo_handle bo1, bo2;
1765 amdgpu_bo_handle *resources;
1766 uint32_t *pm4;
1767 struct amdgpu_cs_ib_info *ib_info;
1768 struct amdgpu_cs_request *ibs_request;
1769 uint64_t bo1_mc, bo2_mc;
1770 volatile unsigned char *bo1_cpu, *bo2_cpu;
1771 int i, j, r, loop1, loop2, ring_id;
1772 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1773 amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1774 struct drm_amdgpu_info_hw_ip hw_ip_info;
1775
1776 pm4 = calloc(pm4_dw, sizeof(*pm4));
1777 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1778
1779 ib_info = calloc(1, sizeof(*ib_info));
1780 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1781
1782 ibs_request = calloc(1, sizeof(*ibs_request));
1783 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1784
1785 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1786 CU_ASSERT_EQUAL(r, 0);
1787
1788 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1789 CU_ASSERT_EQUAL(r, 0);
1790
1791 /* prepare resource */
1792 resources = calloc(2, sizeof(amdgpu_bo_handle));
1793 CU_ASSERT_NOT_EQUAL(resources, NULL);
1794
1795 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1796 loop1 = loop2 = 0;
1797 /* run 9 circle to test all mapping combination */
1798 while(loop1 < 2) {
1799 while(loop2 < 2) {
1800 /* allocate UC bo1for sDMA use */
1801 r = amdgpu_bo_alloc_and_map(device_handle,
1802 sdma_write_length, 4096,
1803 AMDGPU_GEM_DOMAIN_GTT,
1804 gtt_flags[loop1], &bo1,
1805 (void**)&bo1_cpu, &bo1_mc,
1806 &bo1_va_handle);
1807 CU_ASSERT_EQUAL(r, 0);
1808
1809 /* set bo1 */
1810 memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1811
1812 /* allocate UC bo2 for sDMA use */
1813 r = amdgpu_bo_alloc_and_map(device_handle,
1814 sdma_write_length, 4096,
1815 AMDGPU_GEM_DOMAIN_GTT,
1816 gtt_flags[loop2], &bo2,
1817 (void**)&bo2_cpu, &bo2_mc,
1818 &bo2_va_handle);
1819 CU_ASSERT_EQUAL(r, 0);
1820
1821 /* clear bo2 */
1822 memset((void*)bo2_cpu, 0, sdma_write_length);
1823
1824 resources[0] = bo1;
1825 resources[1] = bo2;
1826
1827 /* fulfill PM4: test DMA copy linear */
1828 i = j = 0;
1829 if (ip_type == AMDGPU_HW_IP_DMA) {
1830 if (family_id == AMDGPU_FAMILY_SI) {
1831 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1832 0, 0, 0,
1833 sdma_write_length);
1834 pm4[i++] = 0xffffffff & bo2_mc;
1835 pm4[i++] = 0xffffffff & bo1_mc;
1836 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1837 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1838 } else {
1839 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1840 SDMA_COPY_SUB_OPCODE_LINEAR,
1841 0);
1842 if (family_id >= AMDGPU_FAMILY_AI)
1843 pm4[i++] = sdma_write_length - 1;
1844 else
1845 pm4[i++] = sdma_write_length;
1846 pm4[i++] = 0;
1847 pm4[i++] = 0xffffffff & bo1_mc;
1848 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1849 pm4[i++] = 0xffffffff & bo2_mc;
1850 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1851 }
1852 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1853 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1854 if (family_id == AMDGPU_FAMILY_SI) {
1855 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1856 pm4[i++] = 0xfffffffc & bo1_mc;
1857 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1858 PACKET3_DMA_DATA_SI_DST_SEL(0) |
1859 PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1860 PACKET3_DMA_DATA_SI_CP_SYNC |
1861 (0xffff00000000 & bo1_mc) >> 32;
1862 pm4[i++] = 0xfffffffc & bo2_mc;
1863 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1864 pm4[i++] = sdma_write_length;
1865 } else {
1866 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1867 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1868 PACKET3_DMA_DATA_DST_SEL(0) |
1869 PACKET3_DMA_DATA_SRC_SEL(0) |
1870 PACKET3_DMA_DATA_CP_SYNC;
1871 pm4[i++] = 0xfffffffc & bo1_mc;
1872 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1873 pm4[i++] = 0xfffffffc & bo2_mc;
1874 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1875 pm4[i++] = sdma_write_length;
1876 }
1877 }
1878
1879 amdgpu_test_exec_cs_helper(context_handle,
1880 ip_type, ring_id,
1881 i, pm4,
1882 2, resources,
1883 ib_info, ibs_request);
1884
1885 /* verify if SDMA test result meets with expected */
1886 i = 0;
1887 while(i < sdma_write_length) {
1888 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1889 }
1890 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1891 sdma_write_length);
1892 CU_ASSERT_EQUAL(r, 0);
1893 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1894 sdma_write_length);
1895 CU_ASSERT_EQUAL(r, 0);
1896 loop2++;
1897 }
1898 loop1++;
1899 }
1900 }
1901 /* clean resources */
1902 free(resources);
1903 free(ibs_request);
1904 free(ib_info);
1905 free(pm4);
1906
1907 /* end of test */
1908 r = amdgpu_cs_ctx_free(context_handle);
1909 CU_ASSERT_EQUAL(r, 0);
1910 }
1911
amdgpu_command_submission_sdma_copy_linear(void)1912 static void amdgpu_command_submission_sdma_copy_linear(void)
1913 {
1914 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
1915 }
1916
amdgpu_command_submission_sdma(void)1917 static void amdgpu_command_submission_sdma(void)
1918 {
1919 amdgpu_command_submission_sdma_write_linear();
1920 amdgpu_command_submission_sdma_const_fill();
1921 amdgpu_command_submission_sdma_copy_linear();
1922 }
1923
amdgpu_command_submission_multi_fence_wait_all(bool wait_all)1924 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1925 {
1926 amdgpu_context_handle context_handle;
1927 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1928 void *ib_result_cpu, *ib_result_ce_cpu;
1929 uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1930 struct amdgpu_cs_request ibs_request[2] = {0};
1931 struct amdgpu_cs_ib_info ib_info[2];
1932 struct amdgpu_cs_fence fence_status[2] = {0};
1933 uint32_t *ptr;
1934 uint32_t expired;
1935 amdgpu_bo_list_handle bo_list;
1936 amdgpu_va_handle va_handle, va_handle_ce;
1937 int r;
1938 int i = 0, ib_cs_num = 2;
1939
1940 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1941 CU_ASSERT_EQUAL(r, 0);
1942
1943 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1944 AMDGPU_GEM_DOMAIN_GTT, 0,
1945 &ib_result_handle, &ib_result_cpu,
1946 &ib_result_mc_address, &va_handle);
1947 CU_ASSERT_EQUAL(r, 0);
1948
1949 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1950 AMDGPU_GEM_DOMAIN_GTT, 0,
1951 &ib_result_ce_handle, &ib_result_ce_cpu,
1952 &ib_result_ce_mc_address, &va_handle_ce);
1953 CU_ASSERT_EQUAL(r, 0);
1954
1955 r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1956 ib_result_ce_handle, &bo_list);
1957 CU_ASSERT_EQUAL(r, 0);
1958
1959 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1960
1961 /* IT_SET_CE_DE_COUNTERS */
1962 ptr = ib_result_ce_cpu;
1963 if (family_id != AMDGPU_FAMILY_SI) {
1964 ptr[i++] = 0xc0008900;
1965 ptr[i++] = 0;
1966 }
1967 ptr[i++] = 0xc0008400;
1968 ptr[i++] = 1;
1969 ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1970 ib_info[0].size = i;
1971 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1972
1973 /* IT_WAIT_ON_CE_COUNTER */
1974 ptr = ib_result_cpu;
1975 ptr[0] = 0xc0008600;
1976 ptr[1] = 0x00000001;
1977 ib_info[1].ib_mc_address = ib_result_mc_address;
1978 ib_info[1].size = 2;
1979
1980 for (i = 0; i < ib_cs_num; i++) {
1981 ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1982 ibs_request[i].number_of_ibs = 2;
1983 ibs_request[i].ibs = ib_info;
1984 ibs_request[i].resources = bo_list;
1985 ibs_request[i].fence_info.handle = NULL;
1986 }
1987
1988 r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1989
1990 CU_ASSERT_EQUAL(r, 0);
1991
1992 for (i = 0; i < ib_cs_num; i++) {
1993 fence_status[i].context = context_handle;
1994 fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1995 fence_status[i].fence = ibs_request[i].seq_no;
1996 }
1997
1998 r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1999 AMDGPU_TIMEOUT_INFINITE,
2000 &expired, NULL);
2001 CU_ASSERT_EQUAL(r, 0);
2002
2003 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2004 ib_result_mc_address, 4096);
2005 CU_ASSERT_EQUAL(r, 0);
2006
2007 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
2008 ib_result_ce_mc_address, 4096);
2009 CU_ASSERT_EQUAL(r, 0);
2010
2011 r = amdgpu_bo_list_destroy(bo_list);
2012 CU_ASSERT_EQUAL(r, 0);
2013
2014 r = amdgpu_cs_ctx_free(context_handle);
2015 CU_ASSERT_EQUAL(r, 0);
2016 }
2017
amdgpu_command_submission_multi_fence(void)2018 static void amdgpu_command_submission_multi_fence(void)
2019 {
2020 amdgpu_command_submission_multi_fence_wait_all(true);
2021 amdgpu_command_submission_multi_fence_wait_all(false);
2022 }
2023
amdgpu_userptr_test(void)2024 static void amdgpu_userptr_test(void)
2025 {
2026 int i, r, j;
2027 uint32_t *pm4 = NULL;
2028 uint64_t bo_mc;
2029 void *ptr = NULL;
2030 int pm4_dw = 256;
2031 int sdma_write_length = 4;
2032 amdgpu_bo_handle handle;
2033 amdgpu_context_handle context_handle;
2034 struct amdgpu_cs_ib_info *ib_info;
2035 struct amdgpu_cs_request *ibs_request;
2036 amdgpu_bo_handle buf_handle;
2037 amdgpu_va_handle va_handle;
2038
2039 pm4 = calloc(pm4_dw, sizeof(*pm4));
2040 CU_ASSERT_NOT_EQUAL(pm4, NULL);
2041
2042 ib_info = calloc(1, sizeof(*ib_info));
2043 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
2044
2045 ibs_request = calloc(1, sizeof(*ibs_request));
2046 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
2047
2048 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2049 CU_ASSERT_EQUAL(r, 0);
2050
2051 posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
2052 CU_ASSERT_NOT_EQUAL(ptr, NULL);
2053 memset(ptr, 0, BUFFER_SIZE);
2054
2055 r = amdgpu_create_bo_from_user_mem(device_handle,
2056 ptr, BUFFER_SIZE, &buf_handle);
2057 CU_ASSERT_EQUAL(r, 0);
2058
2059 r = amdgpu_va_range_alloc(device_handle,
2060 amdgpu_gpu_va_range_general,
2061 BUFFER_SIZE, 1, 0, &bo_mc,
2062 &va_handle, 0);
2063 CU_ASSERT_EQUAL(r, 0);
2064
2065 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
2066 CU_ASSERT_EQUAL(r, 0);
2067
2068 handle = buf_handle;
2069
2070 j = i = 0;
2071
2072 if (family_id == AMDGPU_FAMILY_SI)
2073 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
2074 sdma_write_length);
2075 else
2076 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
2077 SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2078 pm4[i++] = 0xffffffff & bo_mc;
2079 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
2080 if (family_id >= AMDGPU_FAMILY_AI)
2081 pm4[i++] = sdma_write_length - 1;
2082 else if (family_id != AMDGPU_FAMILY_SI)
2083 pm4[i++] = sdma_write_length;
2084
2085 while (j++ < sdma_write_length)
2086 pm4[i++] = 0xdeadbeaf;
2087
2088 if (!fork()) {
2089 pm4[0] = 0x0;
2090 exit(0);
2091 }
2092
2093 amdgpu_test_exec_cs_helper(context_handle,
2094 AMDGPU_HW_IP_DMA, 0,
2095 i, pm4,
2096 1, &handle,
2097 ib_info, ibs_request);
2098 i = 0;
2099 while (i < sdma_write_length) {
2100 CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
2101 }
2102 free(ibs_request);
2103 free(ib_info);
2104 free(pm4);
2105
2106 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
2107 CU_ASSERT_EQUAL(r, 0);
2108 r = amdgpu_va_range_free(va_handle);
2109 CU_ASSERT_EQUAL(r, 0);
2110 r = amdgpu_bo_free(buf_handle);
2111 CU_ASSERT_EQUAL(r, 0);
2112 free(ptr);
2113
2114 r = amdgpu_cs_ctx_free(context_handle);
2115 CU_ASSERT_EQUAL(r, 0);
2116
2117 wait(NULL);
2118 }
2119
amdgpu_sync_dependency_test(void)2120 static void amdgpu_sync_dependency_test(void)
2121 {
2122 amdgpu_context_handle context_handle[2];
2123 amdgpu_bo_handle ib_result_handle;
2124 void *ib_result_cpu;
2125 uint64_t ib_result_mc_address;
2126 struct amdgpu_cs_request ibs_request;
2127 struct amdgpu_cs_ib_info ib_info;
2128 struct amdgpu_cs_fence fence_status;
2129 uint32_t expired;
2130 int i, j, r;
2131 amdgpu_bo_list_handle bo_list;
2132 amdgpu_va_handle va_handle;
2133 static uint32_t *ptr;
2134 uint64_t seq_no;
2135
2136 r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
2137 CU_ASSERT_EQUAL(r, 0);
2138 r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
2139 CU_ASSERT_EQUAL(r, 0);
2140
2141 r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
2142 AMDGPU_GEM_DOMAIN_GTT, 0,
2143 &ib_result_handle, &ib_result_cpu,
2144 &ib_result_mc_address, &va_handle);
2145 CU_ASSERT_EQUAL(r, 0);
2146
2147 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
2148 &bo_list);
2149 CU_ASSERT_EQUAL(r, 0);
2150
2151 ptr = ib_result_cpu;
2152 i = 0;
2153
2154 memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
2155
2156 /* Dispatch minimal init config and verify it's executed */
2157 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2158 ptr[i++] = 0x80000000;
2159 ptr[i++] = 0x80000000;
2160
2161 ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
2162 ptr[i++] = 0x80000000;
2163
2164
2165 /* Program compute regs */
2166 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2167 ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
2168 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
2169 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
2170
2171
2172 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2173 ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
2174 /*
2175 * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0
2176 SGPRS = 1
2177 PRIORITY = 0
2178 FLOAT_MODE = 192 (0xc0)
2179 PRIV = 0
2180 DX10_CLAMP = 1
2181 DEBUG_MODE = 0
2182 IEEE_MODE = 0
2183 BULKY = 0
2184 CDBG_USER = 0
2185 *
2186 */
2187 ptr[i++] = 0x002c0040;
2188
2189
2190 /*
2191 * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
2192 USER_SGPR = 8
2193 TRAP_PRESENT = 0
2194 TGID_X_EN = 0
2195 TGID_Y_EN = 0
2196 TGID_Z_EN = 0
2197 TG_SIZE_EN = 0
2198 TIDIG_COMP_CNT = 0
2199 EXCP_EN_MSB = 0
2200 LDS_SIZE = 0
2201 EXCP_EN = 0
2202 *
2203 */
2204 ptr[i++] = 0x00000010;
2205
2206
2207 /*
2208 * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
2209 WAVESIZE = 0
2210 *
2211 */
2212 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2213 ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
2214 ptr[i++] = 0x00000100;
2215
2216 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2217 ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
2218 ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
2219 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2220
2221 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2222 ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
2223 ptr[i++] = 0;
2224
2225 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
2226 ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
2227 ptr[i++] = 1;
2228 ptr[i++] = 1;
2229 ptr[i++] = 1;
2230
2231
2232 /* Dispatch */
2233 ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
2234 ptr[i++] = 1;
2235 ptr[i++] = 1;
2236 ptr[i++] = 1;
2237 ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
2238
2239
2240 while (i & 7)
2241 ptr[i++] = 0xffff1000; /* type3 nop packet */
2242
2243 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2244 ib_info.ib_mc_address = ib_result_mc_address;
2245 ib_info.size = i;
2246
2247 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2248 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2249 ibs_request.ring = 0;
2250 ibs_request.number_of_ibs = 1;
2251 ibs_request.ibs = &ib_info;
2252 ibs_request.resources = bo_list;
2253 ibs_request.fence_info.handle = NULL;
2254
2255 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
2256 CU_ASSERT_EQUAL(r, 0);
2257 seq_no = ibs_request.seq_no;
2258
2259
2260
2261 /* Prepare second command with dependency on the first */
2262 j = i;
2263 ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
2264 ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
2265 ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
2266 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2267 ptr[i++] = 99;
2268
2269 while (i & 7)
2270 ptr[i++] = 0xffff1000; /* type3 nop packet */
2271
2272 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2273 ib_info.ib_mc_address = ib_result_mc_address + j * 4;
2274 ib_info.size = i - j;
2275
2276 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2277 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2278 ibs_request.ring = 0;
2279 ibs_request.number_of_ibs = 1;
2280 ibs_request.ibs = &ib_info;
2281 ibs_request.resources = bo_list;
2282 ibs_request.fence_info.handle = NULL;
2283
2284 ibs_request.number_of_dependencies = 1;
2285
2286 ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
2287 ibs_request.dependencies[0].context = context_handle[1];
2288 ibs_request.dependencies[0].ip_instance = 0;
2289 ibs_request.dependencies[0].ring = 0;
2290 ibs_request.dependencies[0].fence = seq_no;
2291
2292
2293 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
2294 CU_ASSERT_EQUAL(r, 0);
2295
2296
2297 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
2298 fence_status.context = context_handle[0];
2299 fence_status.ip_type = AMDGPU_HW_IP_GFX;
2300 fence_status.ip_instance = 0;
2301 fence_status.ring = 0;
2302 fence_status.fence = ibs_request.seq_no;
2303
2304 r = amdgpu_cs_query_fence_status(&fence_status,
2305 AMDGPU_TIMEOUT_INFINITE,0, &expired);
2306 CU_ASSERT_EQUAL(r, 0);
2307
2308 /* Expect the second command to wait for shader to complete */
2309 CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
2310
2311 r = amdgpu_bo_list_destroy(bo_list);
2312 CU_ASSERT_EQUAL(r, 0);
2313
2314 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2315 ib_result_mc_address, 4096);
2316 CU_ASSERT_EQUAL(r, 0);
2317
2318 r = amdgpu_cs_ctx_free(context_handle[0]);
2319 CU_ASSERT_EQUAL(r, 0);
2320 r = amdgpu_cs_ctx_free(context_handle[1]);
2321 CU_ASSERT_EQUAL(r, 0);
2322
2323 free(ibs_request.dependencies);
2324 }
2325
amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t * ptr,int family)2326 static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family)
2327 {
2328 struct amdgpu_test_shader *shader;
2329 int i, loop = 0x10000;
2330
2331 switch (family) {
2332 case AMDGPU_FAMILY_AI:
2333 shader = &memcpy_cs_hang_slow_ai;
2334 break;
2335 case AMDGPU_FAMILY_RV:
2336 shader = &memcpy_cs_hang_slow_rv;
2337 break;
2338 default:
2339 return -1;
2340 break;
2341 }
2342
2343 memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2344
2345 for (i = 0; i < loop; i++)
2346 memcpy(ptr + shader->header_length + shader->body_length * i,
2347 shader->shader + shader->header_length,
2348 shader->body_length * sizeof(uint32_t));
2349
2350 memcpy(ptr + shader->header_length + shader->body_length * loop,
2351 shader->shader + shader->header_length + shader->body_length,
2352 shader->foot_length * sizeof(uint32_t));
2353
2354 return 0;
2355 }
2356
amdgpu_dispatch_load_cs_shader(uint8_t * ptr,int cs_type)2357 static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
2358 int cs_type)
2359 {
2360 uint32_t shader_size;
2361 const uint32_t *shader;
2362
2363 switch (cs_type) {
2364 case CS_BUFFERCLEAR:
2365 shader = bufferclear_cs_shader_gfx9;
2366 shader_size = sizeof(bufferclear_cs_shader_gfx9);
2367 break;
2368 case CS_BUFFERCOPY:
2369 shader = buffercopy_cs_shader_gfx9;
2370 shader_size = sizeof(buffercopy_cs_shader_gfx9);
2371 break;
2372 case CS_HANG:
2373 shader = memcpy_ps_hang;
2374 shader_size = sizeof(memcpy_ps_hang);
2375 break;
2376 default:
2377 return -1;
2378 break;
2379 }
2380
2381 memcpy(ptr, shader, shader_size);
2382 return 0;
2383 }
2384
amdgpu_dispatch_init(uint32_t * ptr,uint32_t ip_type)2385 static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type)
2386 {
2387 int i = 0;
2388
2389 /* Write context control and load shadowing register if necessary */
2390 if (ip_type == AMDGPU_HW_IP_GFX) {
2391 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2392 ptr[i++] = 0x80000000;
2393 ptr[i++] = 0x80000000;
2394 }
2395
2396 /* Issue commands to set default compute state. */
2397 /* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
2398 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
2399 ptr[i++] = 0x204;
2400 i += 3;
2401
2402 /* clear mmCOMPUTE_TMPRING_SIZE */
2403 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2404 ptr[i++] = 0x218;
2405 ptr[i++] = 0;
2406
2407 return i;
2408 }
2409
amdgpu_dispatch_write_cumask(uint32_t * ptr)2410 static int amdgpu_dispatch_write_cumask(uint32_t *ptr)
2411 {
2412 int i = 0;
2413
2414 /* Issue commands to set cu mask used in current dispatch */
2415 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
2416 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2417 ptr[i++] = 0x216;
2418 ptr[i++] = 0xffffffff;
2419 ptr[i++] = 0xffffffff;
2420 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
2421 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2422 ptr[i++] = 0x219;
2423 ptr[i++] = 0xffffffff;
2424 ptr[i++] = 0xffffffff;
2425
2426 return i;
2427 }
2428
amdgpu_dispatch_write2hw(uint32_t * ptr,uint64_t shader_addr)2429 static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr)
2430 {
2431 int i, j;
2432
2433 i = 0;
2434
2435 /* Writes shader state to HW */
2436 /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
2437 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2438 ptr[i++] = 0x20c;
2439 ptr[i++] = (shader_addr >> 8);
2440 ptr[i++] = (shader_addr >> 40);
2441 /* write sh regs*/
2442 for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
2443 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2444 /* - Gfx9ShRegBase */
2445 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
2446 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
2447 }
2448
2449 return i;
2450 }
2451
amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring)2452 static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
2453 uint32_t ip_type,
2454 uint32_t ring)
2455 {
2456 amdgpu_context_handle context_handle;
2457 amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
2458 volatile unsigned char *ptr_dst;
2459 void *ptr_shader;
2460 uint32_t *ptr_cmd;
2461 uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
2462 amdgpu_va_handle va_dst, va_shader, va_cmd;
2463 int i, r;
2464 int bo_dst_size = 16384;
2465 int bo_shader_size = 4096;
2466 int bo_cmd_size = 4096;
2467 struct amdgpu_cs_request ibs_request = {0};
2468 struct amdgpu_cs_ib_info ib_info= {0};
2469 amdgpu_bo_list_handle bo_list;
2470 struct amdgpu_cs_fence fence_status = {0};
2471 uint32_t expired;
2472
2473 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2474 CU_ASSERT_EQUAL(r, 0);
2475
2476 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2477 AMDGPU_GEM_DOMAIN_GTT, 0,
2478 &bo_cmd, (void **)&ptr_cmd,
2479 &mc_address_cmd, &va_cmd);
2480 CU_ASSERT_EQUAL(r, 0);
2481 memset(ptr_cmd, 0, bo_cmd_size);
2482
2483 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2484 AMDGPU_GEM_DOMAIN_VRAM, 0,
2485 &bo_shader, &ptr_shader,
2486 &mc_address_shader, &va_shader);
2487 CU_ASSERT_EQUAL(r, 0);
2488 memset(ptr_shader, 0, bo_shader_size);
2489
2490 r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR);
2491 CU_ASSERT_EQUAL(r, 0);
2492
2493 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2494 AMDGPU_GEM_DOMAIN_VRAM, 0,
2495 &bo_dst, (void **)&ptr_dst,
2496 &mc_address_dst, &va_dst);
2497 CU_ASSERT_EQUAL(r, 0);
2498
2499 i = 0;
2500 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2501
2502 /* Issue commands to set cu mask used in current dispatch */
2503 i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2504
2505 /* Writes shader state to HW */
2506 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2507
2508 /* Write constant data */
2509 /* Writes the UAV constant data to the SGPRs. */
2510 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2511 ptr_cmd[i++] = 0x240;
2512 ptr_cmd[i++] = mc_address_dst;
2513 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2514 ptr_cmd[i++] = 0x400;
2515 ptr_cmd[i++] = 0x74fac;
2516
2517 /* Sets a range of pixel shader constants */
2518 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2519 ptr_cmd[i++] = 0x244;
2520 ptr_cmd[i++] = 0x22222222;
2521 ptr_cmd[i++] = 0x22222222;
2522 ptr_cmd[i++] = 0x22222222;
2523 ptr_cmd[i++] = 0x22222222;
2524
2525 /* clear mmCOMPUTE_RESOURCE_LIMITS */
2526 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2527 ptr_cmd[i++] = 0x215;
2528 ptr_cmd[i++] = 0;
2529
2530 /* dispatch direct command */
2531 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2532 ptr_cmd[i++] = 0x10;
2533 ptr_cmd[i++] = 1;
2534 ptr_cmd[i++] = 1;
2535 ptr_cmd[i++] = 1;
2536
2537 while (i & 7)
2538 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2539
2540 resources[0] = bo_dst;
2541 resources[1] = bo_shader;
2542 resources[2] = bo_cmd;
2543 r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
2544 CU_ASSERT_EQUAL(r, 0);
2545
2546 ib_info.ib_mc_address = mc_address_cmd;
2547 ib_info.size = i;
2548 ibs_request.ip_type = ip_type;
2549 ibs_request.ring = ring;
2550 ibs_request.resources = bo_list;
2551 ibs_request.number_of_ibs = 1;
2552 ibs_request.ibs = &ib_info;
2553 ibs_request.fence_info.handle = NULL;
2554
2555 /* submit CS */
2556 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2557 CU_ASSERT_EQUAL(r, 0);
2558
2559 r = amdgpu_bo_list_destroy(bo_list);
2560 CU_ASSERT_EQUAL(r, 0);
2561
2562 fence_status.ip_type = ip_type;
2563 fence_status.ip_instance = 0;
2564 fence_status.ring = ring;
2565 fence_status.context = context_handle;
2566 fence_status.fence = ibs_request.seq_no;
2567
2568 /* wait for IB accomplished */
2569 r = amdgpu_cs_query_fence_status(&fence_status,
2570 AMDGPU_TIMEOUT_INFINITE,
2571 0, &expired);
2572 CU_ASSERT_EQUAL(r, 0);
2573 CU_ASSERT_EQUAL(expired, true);
2574
2575 /* verify if memset test result meets with expected */
2576 i = 0;
2577 while(i < bo_dst_size) {
2578 CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
2579 }
2580
2581 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2582 CU_ASSERT_EQUAL(r, 0);
2583
2584 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2585 CU_ASSERT_EQUAL(r, 0);
2586
2587 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2588 CU_ASSERT_EQUAL(r, 0);
2589
2590 r = amdgpu_cs_ctx_free(context_handle);
2591 CU_ASSERT_EQUAL(r, 0);
2592 }
2593
amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring,int hang)2594 static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
2595 uint32_t ip_type,
2596 uint32_t ring,
2597 int hang)
2598 {
2599 amdgpu_context_handle context_handle;
2600 amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2601 volatile unsigned char *ptr_dst;
2602 void *ptr_shader;
2603 unsigned char *ptr_src;
2604 uint32_t *ptr_cmd;
2605 uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2606 amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2607 int i, r;
2608 int bo_dst_size = 16384;
2609 int bo_shader_size = 4096;
2610 int bo_cmd_size = 4096;
2611 struct amdgpu_cs_request ibs_request = {0};
2612 struct amdgpu_cs_ib_info ib_info= {0};
2613 uint32_t expired, hang_state, hangs;
2614 enum cs_type cs_type;
2615 amdgpu_bo_list_handle bo_list;
2616 struct amdgpu_cs_fence fence_status = {0};
2617
2618 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2619 CU_ASSERT_EQUAL(r, 0);
2620
2621 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2622 AMDGPU_GEM_DOMAIN_GTT, 0,
2623 &bo_cmd, (void **)&ptr_cmd,
2624 &mc_address_cmd, &va_cmd);
2625 CU_ASSERT_EQUAL(r, 0);
2626 memset(ptr_cmd, 0, bo_cmd_size);
2627
2628 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2629 AMDGPU_GEM_DOMAIN_VRAM, 0,
2630 &bo_shader, &ptr_shader,
2631 &mc_address_shader, &va_shader);
2632 CU_ASSERT_EQUAL(r, 0);
2633 memset(ptr_shader, 0, bo_shader_size);
2634
2635 cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
2636 r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type);
2637 CU_ASSERT_EQUAL(r, 0);
2638
2639 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2640 AMDGPU_GEM_DOMAIN_VRAM, 0,
2641 &bo_src, (void **)&ptr_src,
2642 &mc_address_src, &va_src);
2643 CU_ASSERT_EQUAL(r, 0);
2644
2645 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2646 AMDGPU_GEM_DOMAIN_VRAM, 0,
2647 &bo_dst, (void **)&ptr_dst,
2648 &mc_address_dst, &va_dst);
2649 CU_ASSERT_EQUAL(r, 0);
2650
2651 memset(ptr_src, 0x55, bo_dst_size);
2652
2653 i = 0;
2654 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2655
2656 /* Issue commands to set cu mask used in current dispatch */
2657 i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2658
2659 /* Writes shader state to HW */
2660 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2661
2662 /* Write constant data */
2663 /* Writes the texture resource constants data to the SGPRs */
2664 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2665 ptr_cmd[i++] = 0x240;
2666 ptr_cmd[i++] = mc_address_src;
2667 ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2668 ptr_cmd[i++] = 0x400;
2669 ptr_cmd[i++] = 0x74fac;
2670
2671 /* Writes the UAV constant data to the SGPRs. */
2672 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2673 ptr_cmd[i++] = 0x244;
2674 ptr_cmd[i++] = mc_address_dst;
2675 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2676 ptr_cmd[i++] = 0x400;
2677 ptr_cmd[i++] = 0x74fac;
2678
2679 /* clear mmCOMPUTE_RESOURCE_LIMITS */
2680 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2681 ptr_cmd[i++] = 0x215;
2682 ptr_cmd[i++] = 0;
2683
2684 /* dispatch direct command */
2685 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2686 ptr_cmd[i++] = 0x10;
2687 ptr_cmd[i++] = 1;
2688 ptr_cmd[i++] = 1;
2689 ptr_cmd[i++] = 1;
2690
2691 while (i & 7)
2692 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2693
2694 resources[0] = bo_shader;
2695 resources[1] = bo_src;
2696 resources[2] = bo_dst;
2697 resources[3] = bo_cmd;
2698 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2699 CU_ASSERT_EQUAL(r, 0);
2700
2701 ib_info.ib_mc_address = mc_address_cmd;
2702 ib_info.size = i;
2703 ibs_request.ip_type = ip_type;
2704 ibs_request.ring = ring;
2705 ibs_request.resources = bo_list;
2706 ibs_request.number_of_ibs = 1;
2707 ibs_request.ibs = &ib_info;
2708 ibs_request.fence_info.handle = NULL;
2709 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2710 CU_ASSERT_EQUAL(r, 0);
2711
2712 fence_status.ip_type = ip_type;
2713 fence_status.ip_instance = 0;
2714 fence_status.ring = ring;
2715 fence_status.context = context_handle;
2716 fence_status.fence = ibs_request.seq_no;
2717
2718 /* wait for IB accomplished */
2719 r = amdgpu_cs_query_fence_status(&fence_status,
2720 AMDGPU_TIMEOUT_INFINITE,
2721 0, &expired);
2722
2723 if (!hang) {
2724 CU_ASSERT_EQUAL(r, 0);
2725 CU_ASSERT_EQUAL(expired, true);
2726
2727 /* verify if memcpy test result meets with expected */
2728 i = 0;
2729 while(i < bo_dst_size) {
2730 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
2731 i++;
2732 }
2733 } else {
2734 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2735 CU_ASSERT_EQUAL(r, 0);
2736 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2737 }
2738
2739 r = amdgpu_bo_list_destroy(bo_list);
2740 CU_ASSERT_EQUAL(r, 0);
2741
2742 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2743 CU_ASSERT_EQUAL(r, 0);
2744 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2745 CU_ASSERT_EQUAL(r, 0);
2746
2747 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2748 CU_ASSERT_EQUAL(r, 0);
2749
2750 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2751 CU_ASSERT_EQUAL(r, 0);
2752
2753 r = amdgpu_cs_ctx_free(context_handle);
2754 CU_ASSERT_EQUAL(r, 0);
2755 }
2756
amdgpu_compute_dispatch_test(void)2757 static void amdgpu_compute_dispatch_test(void)
2758 {
2759 int r;
2760 struct drm_amdgpu_info_hw_ip info;
2761 uint32_t ring_id;
2762
2763 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
2764 CU_ASSERT_EQUAL(r, 0);
2765 if (!info.available_rings)
2766 printf("SKIP ... as there's no compute ring\n");
2767
2768 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2769 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
2770 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0);
2771 }
2772 }
2773
amdgpu_gfx_dispatch_test(void)2774 static void amdgpu_gfx_dispatch_test(void)
2775 {
2776 int r;
2777 struct drm_amdgpu_info_hw_ip info;
2778 uint32_t ring_id;
2779
2780 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2781 CU_ASSERT_EQUAL(r, 0);
2782 if (!info.available_rings)
2783 printf("SKIP ... as there's no graphics ring\n");
2784
2785 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2786 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
2787 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0);
2788 }
2789 }
2790
amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle,uint32_t ip_type)2791 void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2792 {
2793 int r;
2794 struct drm_amdgpu_info_hw_ip info;
2795 uint32_t ring_id;
2796
2797 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2798 CU_ASSERT_EQUAL(r, 0);
2799 if (!info.available_rings)
2800 printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2801
2802 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2803 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2804 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1);
2805 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2806 }
2807 }
2808
amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring)2809 static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
2810 uint32_t ip_type, uint32_t ring)
2811 {
2812 amdgpu_context_handle context_handle;
2813 amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2814 volatile unsigned char *ptr_dst;
2815 void *ptr_shader;
2816 unsigned char *ptr_src;
2817 uint32_t *ptr_cmd;
2818 uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2819 amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2820 int i, r;
2821 int bo_dst_size = 0x4000000;
2822 int bo_shader_size = 0x400000;
2823 int bo_cmd_size = 4096;
2824 struct amdgpu_cs_request ibs_request = {0};
2825 struct amdgpu_cs_ib_info ib_info= {0};
2826 uint32_t hang_state, hangs, expired;
2827 struct amdgpu_gpu_info gpu_info = {0};
2828 amdgpu_bo_list_handle bo_list;
2829 struct amdgpu_cs_fence fence_status = {0};
2830
2831 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
2832 CU_ASSERT_EQUAL(r, 0);
2833
2834 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2835 CU_ASSERT_EQUAL(r, 0);
2836
2837 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2838 AMDGPU_GEM_DOMAIN_GTT, 0,
2839 &bo_cmd, (void **)&ptr_cmd,
2840 &mc_address_cmd, &va_cmd);
2841 CU_ASSERT_EQUAL(r, 0);
2842 memset(ptr_cmd, 0, bo_cmd_size);
2843
2844 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2845 AMDGPU_GEM_DOMAIN_VRAM, 0,
2846 &bo_shader, &ptr_shader,
2847 &mc_address_shader, &va_shader);
2848 CU_ASSERT_EQUAL(r, 0);
2849 memset(ptr_shader, 0, bo_shader_size);
2850
2851 r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id);
2852 CU_ASSERT_EQUAL(r, 0);
2853
2854 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2855 AMDGPU_GEM_DOMAIN_VRAM, 0,
2856 &bo_src, (void **)&ptr_src,
2857 &mc_address_src, &va_src);
2858 CU_ASSERT_EQUAL(r, 0);
2859
2860 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2861 AMDGPU_GEM_DOMAIN_VRAM, 0,
2862 &bo_dst, (void **)&ptr_dst,
2863 &mc_address_dst, &va_dst);
2864 CU_ASSERT_EQUAL(r, 0);
2865
2866 memset(ptr_src, 0x55, bo_dst_size);
2867
2868 i = 0;
2869 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2870
2871 /* Issue commands to set cu mask used in current dispatch */
2872 i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2873
2874 /* Writes shader state to HW */
2875 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2876
2877 /* Write constant data */
2878 /* Writes the texture resource constants data to the SGPRs */
2879 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2880 ptr_cmd[i++] = 0x240;
2881 ptr_cmd[i++] = mc_address_src;
2882 ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2883 ptr_cmd[i++] = 0x400000;
2884 ptr_cmd[i++] = 0x74fac;
2885
2886 /* Writes the UAV constant data to the SGPRs. */
2887 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2888 ptr_cmd[i++] = 0x244;
2889 ptr_cmd[i++] = mc_address_dst;
2890 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2891 ptr_cmd[i++] = 0x400000;
2892 ptr_cmd[i++] = 0x74fac;
2893
2894 /* clear mmCOMPUTE_RESOURCE_LIMITS */
2895 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2896 ptr_cmd[i++] = 0x215;
2897 ptr_cmd[i++] = 0;
2898
2899 /* dispatch direct command */
2900 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2901 ptr_cmd[i++] = 0x10000;
2902 ptr_cmd[i++] = 1;
2903 ptr_cmd[i++] = 1;
2904 ptr_cmd[i++] = 1;
2905
2906 while (i & 7)
2907 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2908
2909 resources[0] = bo_shader;
2910 resources[1] = bo_src;
2911 resources[2] = bo_dst;
2912 resources[3] = bo_cmd;
2913 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2914 CU_ASSERT_EQUAL(r, 0);
2915
2916 ib_info.ib_mc_address = mc_address_cmd;
2917 ib_info.size = i;
2918 ibs_request.ip_type = ip_type;
2919 ibs_request.ring = ring;
2920 ibs_request.resources = bo_list;
2921 ibs_request.number_of_ibs = 1;
2922 ibs_request.ibs = &ib_info;
2923 ibs_request.fence_info.handle = NULL;
2924 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2925 CU_ASSERT_EQUAL(r, 0);
2926
2927 fence_status.ip_type = ip_type;
2928 fence_status.ip_instance = 0;
2929 fence_status.ring = ring;
2930 fence_status.context = context_handle;
2931 fence_status.fence = ibs_request.seq_no;
2932
2933 /* wait for IB accomplished */
2934 r = amdgpu_cs_query_fence_status(&fence_status,
2935 AMDGPU_TIMEOUT_INFINITE,
2936 0, &expired);
2937
2938 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2939 CU_ASSERT_EQUAL(r, 0);
2940 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2941
2942 r = amdgpu_bo_list_destroy(bo_list);
2943 CU_ASSERT_EQUAL(r, 0);
2944
2945 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2946 CU_ASSERT_EQUAL(r, 0);
2947 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2948 CU_ASSERT_EQUAL(r, 0);
2949
2950 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2951 CU_ASSERT_EQUAL(r, 0);
2952
2953 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2954 CU_ASSERT_EQUAL(r, 0);
2955
2956 r = amdgpu_cs_ctx_free(context_handle);
2957 CU_ASSERT_EQUAL(r, 0);
2958 }
2959
amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle,uint32_t ip_type)2960 void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2961 {
2962 int r;
2963 struct drm_amdgpu_info_hw_ip info;
2964 uint32_t ring_id;
2965
2966 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2967 CU_ASSERT_EQUAL(r, 0);
2968 if (!info.available_rings)
2969 printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2970
2971 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2972 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2973 amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id);
2974 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2975 }
2976 }
2977
amdgpu_draw_load_ps_shader_hang_slow(uint32_t * ptr,int family)2978 static int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family)
2979 {
2980 struct amdgpu_test_shader *shader;
2981 int i, loop = 0x40000;
2982
2983 switch (family) {
2984 case AMDGPU_FAMILY_AI:
2985 case AMDGPU_FAMILY_RV:
2986 shader = &memcpy_ps_hang_slow_ai;
2987 break;
2988 default:
2989 return -1;
2990 break;
2991 }
2992
2993 memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2994
2995 for (i = 0; i < loop; i++)
2996 memcpy(ptr + shader->header_length + shader->body_length * i,
2997 shader->shader + shader->header_length,
2998 shader->body_length * sizeof(uint32_t));
2999
3000 memcpy(ptr + shader->header_length + shader->body_length * loop,
3001 shader->shader + shader->header_length + shader->body_length,
3002 shader->foot_length * sizeof(uint32_t));
3003
3004 return 0;
3005 }
3006
amdgpu_draw_load_ps_shader(uint8_t * ptr,int ps_type)3007 static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type)
3008 {
3009 int i;
3010 uint32_t shader_offset= 256;
3011 uint32_t mem_offset, patch_code_offset;
3012 uint32_t shader_size, patchinfo_code_size;
3013 const uint32_t *shader;
3014 const uint32_t *patchinfo_code;
3015 const uint32_t *patchcode_offset;
3016
3017 switch (ps_type) {
3018 case PS_CONST:
3019 shader = ps_const_shader_gfx9;
3020 shader_size = sizeof(ps_const_shader_gfx9);
3021 patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
3022 patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
3023 patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
3024 break;
3025 case PS_TEX:
3026 shader = ps_tex_shader_gfx9;
3027 shader_size = sizeof(ps_tex_shader_gfx9);
3028 patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
3029 patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
3030 patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
3031 break;
3032 case PS_HANG:
3033 shader = memcpy_ps_hang;
3034 shader_size = sizeof(memcpy_ps_hang);
3035
3036 memcpy(ptr, shader, shader_size);
3037 return 0;
3038 default:
3039 return -1;
3040 break;
3041 }
3042
3043 /* write main shader program */
3044 for (i = 0 ; i < 10; i++) {
3045 mem_offset = i * shader_offset;
3046 memcpy(ptr + mem_offset, shader, shader_size);
3047 }
3048
3049 /* overwrite patch codes */
3050 for (i = 0 ; i < 10; i++) {
3051 mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
3052 patch_code_offset = i * patchinfo_code_size;
3053 memcpy(ptr + mem_offset,
3054 patchinfo_code + patch_code_offset,
3055 patchinfo_code_size * sizeof(uint32_t));
3056 }
3057
3058 return 0;
3059 }
3060
3061 /* load RectPosTexFast_VS */
amdgpu_draw_load_vs_shader(uint8_t * ptr)3062 static int amdgpu_draw_load_vs_shader(uint8_t *ptr)
3063 {
3064 const uint32_t *shader;
3065 uint32_t shader_size;
3066
3067 shader = vs_RectPosTexFast_shader_gfx9;
3068 shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
3069
3070 memcpy(ptr, shader, shader_size);
3071
3072 return 0;
3073 }
3074
amdgpu_draw_init(uint32_t * ptr)3075 static int amdgpu_draw_init(uint32_t *ptr)
3076 {
3077 int i = 0;
3078 const uint32_t *preamblecache_ptr;
3079 uint32_t preamblecache_size;
3080
3081 /* Write context control and load shadowing register if necessary */
3082 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
3083 ptr[i++] = 0x80000000;
3084 ptr[i++] = 0x80000000;
3085
3086 preamblecache_ptr = preamblecache_gfx9;
3087 preamblecache_size = sizeof(preamblecache_gfx9);
3088
3089 memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
3090 return i + preamblecache_size/sizeof(uint32_t);
3091 }
3092
amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t * ptr,uint64_t dst_addr,int hang_slow)3093 static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
3094 uint64_t dst_addr,
3095 int hang_slow)
3096 {
3097 int i = 0;
3098
3099 /* setup color buffer */
3100 /* offset reg
3101 0xA318 CB_COLOR0_BASE
3102 0xA319 CB_COLOR0_BASE_EXT
3103 0xA31A CB_COLOR0_ATTRIB2
3104 0xA31B CB_COLOR0_VIEW
3105 0xA31C CB_COLOR0_INFO
3106 0xA31D CB_COLOR0_ATTRIB
3107 0xA31E CB_COLOR0_DCC_CONTROL
3108 0xA31F CB_COLOR0_CMASK
3109 0xA320 CB_COLOR0_CMASK_BASE_EXT
3110 0xA321 CB_COLOR0_FMASK
3111 0xA322 CB_COLOR0_FMASK_BASE_EXT
3112 0xA323 CB_COLOR0_CLEAR_WORD0
3113 0xA324 CB_COLOR0_CLEAR_WORD1
3114 0xA325 CB_COLOR0_DCC_BASE
3115 0xA326 CB_COLOR0_DCC_BASE_EXT */
3116 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
3117 ptr[i++] = 0x318;
3118 ptr[i++] = dst_addr >> 8;
3119 ptr[i++] = dst_addr >> 40;
3120 ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f;
3121 ptr[i++] = 0;
3122 ptr[i++] = 0x50438;
3123 ptr[i++] = 0x10140000;
3124 i += 9;
3125
3126 /* mmCB_MRT0_EPITCH */
3127 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3128 ptr[i++] = 0x1e8;
3129 ptr[i++] = hang_slow ? 0x7ff : 0x1f;
3130
3131 /* 0xA32B CB_COLOR1_BASE */
3132 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3133 ptr[i++] = 0x32b;
3134 ptr[i++] = 0;
3135
3136 /* 0xA33A CB_COLOR1_BASE */
3137 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3138 ptr[i++] = 0x33a;
3139 ptr[i++] = 0;
3140
3141 /* SPI_SHADER_COL_FORMAT */
3142 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3143 ptr[i++] = 0x1c5;
3144 ptr[i++] = 9;
3145
3146 /* Setup depth buffer */
3147 /* mmDB_Z_INFO */
3148 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3149 ptr[i++] = 0xe;
3150 i += 2;
3151
3152 return i;
3153 }
3154
amdgpu_draw_setup_and_write_drawblt_state(uint32_t * ptr,int hang_slow)3155 static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow)
3156 {
3157 int i = 0;
3158 const uint32_t *cached_cmd_ptr;
3159 uint32_t cached_cmd_size;
3160
3161 /* mmPA_SC_TILE_STEERING_OVERRIDE */
3162 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3163 ptr[i++] = 0xd7;
3164 ptr[i++] = 0;
3165
3166 ptr[i++] = 0xffff1000;
3167 ptr[i++] = 0xc0021000;
3168
3169 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3170 ptr[i++] = 0xd7;
3171 ptr[i++] = 1;
3172
3173 /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
3174 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
3175 ptr[i++] = 0x2fe;
3176 i += 16;
3177
3178 /* mmPA_SC_CENTROID_PRIORITY_0 */
3179 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3180 ptr[i++] = 0x2f5;
3181 i += 2;
3182
3183 cached_cmd_ptr = cached_cmd_gfx9;
3184 cached_cmd_size = sizeof(cached_cmd_gfx9);
3185
3186 memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
3187 if (hang_slow)
3188 *(ptr + i + 12) = 0x8000800;
3189 i += cached_cmd_size/sizeof(uint32_t);
3190
3191 return i;
3192 }
3193
amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr,int hang_slow)3194 static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
3195 int ps_type,
3196 uint64_t shader_addr,
3197 int hang_slow)
3198 {
3199 int i = 0;
3200
3201 /* mmPA_CL_VS_OUT_CNTL */
3202 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3203 ptr[i++] = 0x207;
3204 ptr[i++] = 0;
3205
3206 /* mmSPI_SHADER_PGM_RSRC3_VS */
3207 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3208 ptr[i++] = 0x46;
3209 ptr[i++] = 0xffff;
3210
3211 /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
3212 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
3213 ptr[i++] = 0x48;
3214 ptr[i++] = shader_addr >> 8;
3215 ptr[i++] = shader_addr >> 40;
3216
3217 /* mmSPI_SHADER_PGM_RSRC1_VS */
3218 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3219 ptr[i++] = 0x4a;
3220 ptr[i++] = 0xc0081;
3221 /* mmSPI_SHADER_PGM_RSRC2_VS */
3222 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3223 ptr[i++] = 0x4b;
3224 ptr[i++] = 0x18;
3225
3226 /* mmSPI_VS_OUT_CONFIG */
3227 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3228 ptr[i++] = 0x1b1;
3229 ptr[i++] = 2;
3230
3231 /* mmSPI_SHADER_POS_FORMAT */
3232 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3233 ptr[i++] = 0x1c3;
3234 ptr[i++] = 4;
3235
3236 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3237 ptr[i++] = 0x4c;
3238 i += 2;
3239 ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3240 ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3241
3242 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3243 ptr[i++] = 0x50;
3244 i += 2;
3245 if (ps_type == PS_CONST) {
3246 i += 2;
3247 } else if (ps_type == PS_TEX) {
3248 ptr[i++] = 0x3f800000;
3249 ptr[i++] = 0x3f800000;
3250 }
3251
3252 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3253 ptr[i++] = 0x54;
3254 i += 4;
3255
3256 return i;
3257 }
3258
amdgpu_draw_ps_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr)3259 static int amdgpu_draw_ps_write2hw(uint32_t *ptr,
3260 int ps_type,
3261 uint64_t shader_addr)
3262 {
3263 int i, j;
3264 const uint32_t *sh_registers;
3265 const uint32_t *context_registers;
3266 uint32_t num_sh_reg, num_context_reg;
3267
3268 if (ps_type == PS_CONST) {
3269 sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
3270 context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
3271 num_sh_reg = ps_num_sh_registers_gfx9;
3272 num_context_reg = ps_num_context_registers_gfx9;
3273 } else if (ps_type == PS_TEX) {
3274 sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
3275 context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
3276 num_sh_reg = ps_num_sh_registers_gfx9;
3277 num_context_reg = ps_num_context_registers_gfx9;
3278 }
3279
3280 i = 0;
3281
3282 /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS
3283 0x2c08 SPI_SHADER_PGM_LO_PS
3284 0x2c09 SPI_SHADER_PGM_HI_PS */
3285 shader_addr += 256 * 9;
3286 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
3287 ptr[i++] = 0x7;
3288 ptr[i++] = 0xffff;
3289 ptr[i++] = shader_addr >> 8;
3290 ptr[i++] = shader_addr >> 40;
3291
3292 for (j = 0; j < num_sh_reg; j++) {
3293 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3294 ptr[i++] = sh_registers[j * 2] - 0x2c00;
3295 ptr[i++] = sh_registers[j * 2 + 1];
3296 }
3297
3298 for (j = 0; j < num_context_reg; j++) {
3299 if (context_registers[j * 2] != 0xA1C5) {
3300 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3301 ptr[i++] = context_registers[j * 2] - 0xa000;
3302 ptr[i++] = context_registers[j * 2 + 1];
3303 }
3304
3305 if (context_registers[j * 2] == 0xA1B4) {
3306 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3307 ptr[i++] = 0x1b3;
3308 ptr[i++] = 2;
3309 }
3310 }
3311
3312 return i;
3313 }
3314
amdgpu_draw_draw(uint32_t * ptr)3315 static int amdgpu_draw_draw(uint32_t *ptr)
3316 {
3317 int i = 0;
3318
3319 /* mmIA_MULTI_VGT_PARAM */
3320 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3321 ptr[i++] = 0x40000258;
3322 ptr[i++] = 0xd00ff;
3323
3324 /* mmVGT_PRIMITIVE_TYPE */
3325 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3326 ptr[i++] = 0x10000242;
3327 ptr[i++] = 0x11;
3328
3329 ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
3330 ptr[i++] = 3;
3331 ptr[i++] = 2;
3332
3333 return i;
3334 }
3335
amdgpu_memset_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring_id)3336 void amdgpu_memset_draw(amdgpu_device_handle device_handle,
3337 amdgpu_bo_handle bo_shader_ps,
3338 amdgpu_bo_handle bo_shader_vs,
3339 uint64_t mc_address_shader_ps,
3340 uint64_t mc_address_shader_vs,
3341 uint32_t ring_id)
3342 {
3343 amdgpu_context_handle context_handle;
3344 amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
3345 volatile unsigned char *ptr_dst;
3346 uint32_t *ptr_cmd;
3347 uint64_t mc_address_dst, mc_address_cmd;
3348 amdgpu_va_handle va_dst, va_cmd;
3349 int i, r;
3350 int bo_dst_size = 16384;
3351 int bo_cmd_size = 4096;
3352 struct amdgpu_cs_request ibs_request = {0};
3353 struct amdgpu_cs_ib_info ib_info = {0};
3354 struct amdgpu_cs_fence fence_status = {0};
3355 uint32_t expired;
3356 amdgpu_bo_list_handle bo_list;
3357
3358 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3359 CU_ASSERT_EQUAL(r, 0);
3360
3361 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3362 AMDGPU_GEM_DOMAIN_GTT, 0,
3363 &bo_cmd, (void **)&ptr_cmd,
3364 &mc_address_cmd, &va_cmd);
3365 CU_ASSERT_EQUAL(r, 0);
3366 memset(ptr_cmd, 0, bo_cmd_size);
3367
3368 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
3369 AMDGPU_GEM_DOMAIN_VRAM, 0,
3370 &bo_dst, (void **)&ptr_dst,
3371 &mc_address_dst, &va_dst);
3372 CU_ASSERT_EQUAL(r, 0);
3373
3374 i = 0;
3375 i += amdgpu_draw_init(ptr_cmd + i);
3376
3377 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3378
3379 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3380
3381 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0);
3382
3383 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps);
3384
3385 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3386 ptr_cmd[i++] = 0xc;
3387 ptr_cmd[i++] = 0x33333333;
3388 ptr_cmd[i++] = 0x33333333;
3389 ptr_cmd[i++] = 0x33333333;
3390 ptr_cmd[i++] = 0x33333333;
3391
3392 i += amdgpu_draw_draw(ptr_cmd + i);
3393
3394 while (i & 7)
3395 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3396
3397 resources[0] = bo_dst;
3398 resources[1] = bo_shader_ps;
3399 resources[2] = bo_shader_vs;
3400 resources[3] = bo_cmd;
3401 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
3402 CU_ASSERT_EQUAL(r, 0);
3403
3404 ib_info.ib_mc_address = mc_address_cmd;
3405 ib_info.size = i;
3406 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3407 ibs_request.ring = ring_id;
3408 ibs_request.resources = bo_list;
3409 ibs_request.number_of_ibs = 1;
3410 ibs_request.ibs = &ib_info;
3411 ibs_request.fence_info.handle = NULL;
3412
3413 /* submit CS */
3414 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3415 CU_ASSERT_EQUAL(r, 0);
3416
3417 r = amdgpu_bo_list_destroy(bo_list);
3418 CU_ASSERT_EQUAL(r, 0);
3419
3420 fence_status.ip_type = AMDGPU_HW_IP_GFX;
3421 fence_status.ip_instance = 0;
3422 fence_status.ring = ring_id;
3423 fence_status.context = context_handle;
3424 fence_status.fence = ibs_request.seq_no;
3425
3426 /* wait for IB accomplished */
3427 r = amdgpu_cs_query_fence_status(&fence_status,
3428 AMDGPU_TIMEOUT_INFINITE,
3429 0, &expired);
3430 CU_ASSERT_EQUAL(r, 0);
3431 CU_ASSERT_EQUAL(expired, true);
3432
3433 /* verify if memset test result meets with expected */
3434 i = 0;
3435 while(i < bo_dst_size) {
3436 CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
3437 }
3438
3439 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
3440 CU_ASSERT_EQUAL(r, 0);
3441
3442 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3443 CU_ASSERT_EQUAL(r, 0);
3444
3445 r = amdgpu_cs_ctx_free(context_handle);
3446 CU_ASSERT_EQUAL(r, 0);
3447 }
3448
amdgpu_memset_draw_test(amdgpu_device_handle device_handle,uint32_t ring)3449 static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
3450 uint32_t ring)
3451 {
3452 amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3453 void *ptr_shader_ps;
3454 void *ptr_shader_vs;
3455 uint64_t mc_address_shader_ps, mc_address_shader_vs;
3456 amdgpu_va_handle va_shader_ps, va_shader_vs;
3457 int r;
3458 int bo_shader_size = 4096;
3459
3460 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3461 AMDGPU_GEM_DOMAIN_VRAM, 0,
3462 &bo_shader_ps, &ptr_shader_ps,
3463 &mc_address_shader_ps, &va_shader_ps);
3464 CU_ASSERT_EQUAL(r, 0);
3465 memset(ptr_shader_ps, 0, bo_shader_size);
3466
3467 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3468 AMDGPU_GEM_DOMAIN_VRAM, 0,
3469 &bo_shader_vs, &ptr_shader_vs,
3470 &mc_address_shader_vs, &va_shader_vs);
3471 CU_ASSERT_EQUAL(r, 0);
3472 memset(ptr_shader_vs, 0, bo_shader_size);
3473
3474 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST);
3475 CU_ASSERT_EQUAL(r, 0);
3476
3477 r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3478 CU_ASSERT_EQUAL(r, 0);
3479
3480 amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
3481 mc_address_shader_ps, mc_address_shader_vs, ring);
3482
3483 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3484 CU_ASSERT_EQUAL(r, 0);
3485
3486 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3487 CU_ASSERT_EQUAL(r, 0);
3488 }
3489
amdgpu_memcpy_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring,int hang)3490 static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
3491 amdgpu_bo_handle bo_shader_ps,
3492 amdgpu_bo_handle bo_shader_vs,
3493 uint64_t mc_address_shader_ps,
3494 uint64_t mc_address_shader_vs,
3495 uint32_t ring, int hang)
3496 {
3497 amdgpu_context_handle context_handle;
3498 amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3499 volatile unsigned char *ptr_dst;
3500 unsigned char *ptr_src;
3501 uint32_t *ptr_cmd;
3502 uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3503 amdgpu_va_handle va_dst, va_src, va_cmd;
3504 int i, r;
3505 int bo_size = 16384;
3506 int bo_cmd_size = 4096;
3507 struct amdgpu_cs_request ibs_request = {0};
3508 struct amdgpu_cs_ib_info ib_info= {0};
3509 uint32_t hang_state, hangs;
3510 uint32_t expired;
3511 amdgpu_bo_list_handle bo_list;
3512 struct amdgpu_cs_fence fence_status = {0};
3513
3514 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3515 CU_ASSERT_EQUAL(r, 0);
3516
3517 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3518 AMDGPU_GEM_DOMAIN_GTT, 0,
3519 &bo_cmd, (void **)&ptr_cmd,
3520 &mc_address_cmd, &va_cmd);
3521 CU_ASSERT_EQUAL(r, 0);
3522 memset(ptr_cmd, 0, bo_cmd_size);
3523
3524 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3525 AMDGPU_GEM_DOMAIN_VRAM, 0,
3526 &bo_src, (void **)&ptr_src,
3527 &mc_address_src, &va_src);
3528 CU_ASSERT_EQUAL(r, 0);
3529
3530 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3531 AMDGPU_GEM_DOMAIN_VRAM, 0,
3532 &bo_dst, (void **)&ptr_dst,
3533 &mc_address_dst, &va_dst);
3534 CU_ASSERT_EQUAL(r, 0);
3535
3536 memset(ptr_src, 0x55, bo_size);
3537
3538 i = 0;
3539 i += amdgpu_draw_init(ptr_cmd + i);
3540
3541 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3542
3543 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3544
3545 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0);
3546
3547 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3548
3549 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3550 ptr_cmd[i++] = 0xc;
3551 ptr_cmd[i++] = mc_address_src >> 8;
3552 ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3553 ptr_cmd[i++] = 0x7c01f;
3554 ptr_cmd[i++] = 0x90500fac;
3555 ptr_cmd[i++] = 0x3e000;
3556 i += 3;
3557
3558 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3559 ptr_cmd[i++] = 0x14;
3560 ptr_cmd[i++] = 0x92;
3561 i += 3;
3562
3563 ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3564 ptr_cmd[i++] = 0x191;
3565 ptr_cmd[i++] = 0;
3566
3567 i += amdgpu_draw_draw(ptr_cmd + i);
3568
3569 while (i & 7)
3570 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3571
3572 resources[0] = bo_dst;
3573 resources[1] = bo_src;
3574 resources[2] = bo_shader_ps;
3575 resources[3] = bo_shader_vs;
3576 resources[4] = bo_cmd;
3577 r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3578 CU_ASSERT_EQUAL(r, 0);
3579
3580 ib_info.ib_mc_address = mc_address_cmd;
3581 ib_info.size = i;
3582 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3583 ibs_request.ring = ring;
3584 ibs_request.resources = bo_list;
3585 ibs_request.number_of_ibs = 1;
3586 ibs_request.ibs = &ib_info;
3587 ibs_request.fence_info.handle = NULL;
3588 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3589 CU_ASSERT_EQUAL(r, 0);
3590
3591 fence_status.ip_type = AMDGPU_HW_IP_GFX;
3592 fence_status.ip_instance = 0;
3593 fence_status.ring = ring;
3594 fence_status.context = context_handle;
3595 fence_status.fence = ibs_request.seq_no;
3596
3597 /* wait for IB accomplished */
3598 r = amdgpu_cs_query_fence_status(&fence_status,
3599 AMDGPU_TIMEOUT_INFINITE,
3600 0, &expired);
3601 if (!hang) {
3602 CU_ASSERT_EQUAL(r, 0);
3603 CU_ASSERT_EQUAL(expired, true);
3604
3605 /* verify if memcpy test result meets with expected */
3606 i = 0;
3607 while(i < bo_size) {
3608 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
3609 i++;
3610 }
3611 } else {
3612 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3613 CU_ASSERT_EQUAL(r, 0);
3614 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3615 }
3616
3617 r = amdgpu_bo_list_destroy(bo_list);
3618 CU_ASSERT_EQUAL(r, 0);
3619
3620 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3621 CU_ASSERT_EQUAL(r, 0);
3622 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3623 CU_ASSERT_EQUAL(r, 0);
3624
3625 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3626 CU_ASSERT_EQUAL(r, 0);
3627
3628 r = amdgpu_cs_ctx_free(context_handle);
3629 CU_ASSERT_EQUAL(r, 0);
3630 }
3631
amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle,uint32_t ring,int hang)3632 void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring,
3633 int hang)
3634 {
3635 amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3636 void *ptr_shader_ps;
3637 void *ptr_shader_vs;
3638 uint64_t mc_address_shader_ps, mc_address_shader_vs;
3639 amdgpu_va_handle va_shader_ps, va_shader_vs;
3640 int bo_shader_size = 4096;
3641 enum ps_type ps_type = hang ? PS_HANG : PS_TEX;
3642 int r;
3643
3644 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3645 AMDGPU_GEM_DOMAIN_VRAM, 0,
3646 &bo_shader_ps, &ptr_shader_ps,
3647 &mc_address_shader_ps, &va_shader_ps);
3648 CU_ASSERT_EQUAL(r, 0);
3649 memset(ptr_shader_ps, 0, bo_shader_size);
3650
3651 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3652 AMDGPU_GEM_DOMAIN_VRAM, 0,
3653 &bo_shader_vs, &ptr_shader_vs,
3654 &mc_address_shader_vs, &va_shader_vs);
3655 CU_ASSERT_EQUAL(r, 0);
3656 memset(ptr_shader_vs, 0, bo_shader_size);
3657
3658 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type);
3659 CU_ASSERT_EQUAL(r, 0);
3660
3661 r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3662 CU_ASSERT_EQUAL(r, 0);
3663
3664 amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
3665 mc_address_shader_ps, mc_address_shader_vs, ring, hang);
3666
3667 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3668 CU_ASSERT_EQUAL(r, 0);
3669
3670 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3671 CU_ASSERT_EQUAL(r, 0);
3672 }
3673
amdgpu_draw_test(void)3674 static void amdgpu_draw_test(void)
3675 {
3676 int r;
3677 struct drm_amdgpu_info_hw_ip info;
3678 uint32_t ring_id;
3679
3680 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
3681 CU_ASSERT_EQUAL(r, 0);
3682 if (!info.available_rings)
3683 printf("SKIP ... as there's no graphics ring\n");
3684
3685 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
3686 amdgpu_memset_draw_test(device_handle, ring_id);
3687 amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
3688 }
3689 }
3690
amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle,uint32_t ring)3691 void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring)
3692 {
3693 amdgpu_context_handle context_handle;
3694 amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3695 amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3696 void *ptr_shader_ps;
3697 void *ptr_shader_vs;
3698 volatile unsigned char *ptr_dst;
3699 unsigned char *ptr_src;
3700 uint32_t *ptr_cmd;
3701 uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3702 uint64_t mc_address_shader_ps, mc_address_shader_vs;
3703 amdgpu_va_handle va_shader_ps, va_shader_vs;
3704 amdgpu_va_handle va_dst, va_src, va_cmd;
3705 struct amdgpu_gpu_info gpu_info = {0};
3706 int i, r;
3707 int bo_size = 0x4000000;
3708 int bo_shader_ps_size = 0x400000;
3709 int bo_shader_vs_size = 4096;
3710 int bo_cmd_size = 4096;
3711 struct amdgpu_cs_request ibs_request = {0};
3712 struct amdgpu_cs_ib_info ib_info= {0};
3713 uint32_t hang_state, hangs, expired;
3714 amdgpu_bo_list_handle bo_list;
3715 struct amdgpu_cs_fence fence_status = {0};
3716
3717 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
3718 CU_ASSERT_EQUAL(r, 0);
3719
3720 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3721 CU_ASSERT_EQUAL(r, 0);
3722
3723 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3724 AMDGPU_GEM_DOMAIN_GTT, 0,
3725 &bo_cmd, (void **)&ptr_cmd,
3726 &mc_address_cmd, &va_cmd);
3727 CU_ASSERT_EQUAL(r, 0);
3728 memset(ptr_cmd, 0, bo_cmd_size);
3729
3730 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096,
3731 AMDGPU_GEM_DOMAIN_VRAM, 0,
3732 &bo_shader_ps, &ptr_shader_ps,
3733 &mc_address_shader_ps, &va_shader_ps);
3734 CU_ASSERT_EQUAL(r, 0);
3735 memset(ptr_shader_ps, 0, bo_shader_ps_size);
3736
3737 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096,
3738 AMDGPU_GEM_DOMAIN_VRAM, 0,
3739 &bo_shader_vs, &ptr_shader_vs,
3740 &mc_address_shader_vs, &va_shader_vs);
3741 CU_ASSERT_EQUAL(r, 0);
3742 memset(ptr_shader_vs, 0, bo_shader_vs_size);
3743
3744 r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id);
3745 CU_ASSERT_EQUAL(r, 0);
3746
3747 r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3748 CU_ASSERT_EQUAL(r, 0);
3749
3750 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3751 AMDGPU_GEM_DOMAIN_VRAM, 0,
3752 &bo_src, (void **)&ptr_src,
3753 &mc_address_src, &va_src);
3754 CU_ASSERT_EQUAL(r, 0);
3755
3756 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3757 AMDGPU_GEM_DOMAIN_VRAM, 0,
3758 &bo_dst, (void **)&ptr_dst,
3759 &mc_address_dst, &va_dst);
3760 CU_ASSERT_EQUAL(r, 0);
3761
3762 memset(ptr_src, 0x55, bo_size);
3763
3764 i = 0;
3765 i += amdgpu_draw_init(ptr_cmd + i);
3766
3767 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1);
3768
3769 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1);
3770
3771 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX,
3772 mc_address_shader_vs, 1);
3773
3774 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3775
3776 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3777 ptr_cmd[i++] = 0xc;
3778 ptr_cmd[i++] = mc_address_src >> 8;
3779 ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3780 ptr_cmd[i++] = 0x1ffc7ff;
3781 ptr_cmd[i++] = 0x90500fac;
3782 ptr_cmd[i++] = 0xffe000;
3783 i += 3;
3784
3785 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3786 ptr_cmd[i++] = 0x14;
3787 ptr_cmd[i++] = 0x92;
3788 i += 3;
3789
3790 ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3791 ptr_cmd[i++] = 0x191;
3792 ptr_cmd[i++] = 0;
3793
3794 i += amdgpu_draw_draw(ptr_cmd + i);
3795
3796 while (i & 7)
3797 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3798
3799 resources[0] = bo_dst;
3800 resources[1] = bo_src;
3801 resources[2] = bo_shader_ps;
3802 resources[3] = bo_shader_vs;
3803 resources[4] = bo_cmd;
3804 r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3805 CU_ASSERT_EQUAL(r, 0);
3806
3807 ib_info.ib_mc_address = mc_address_cmd;
3808 ib_info.size = i;
3809 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3810 ibs_request.ring = ring;
3811 ibs_request.resources = bo_list;
3812 ibs_request.number_of_ibs = 1;
3813 ibs_request.ibs = &ib_info;
3814 ibs_request.fence_info.handle = NULL;
3815 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3816 CU_ASSERT_EQUAL(r, 0);
3817
3818 fence_status.ip_type = AMDGPU_HW_IP_GFX;
3819 fence_status.ip_instance = 0;
3820 fence_status.ring = ring;
3821 fence_status.context = context_handle;
3822 fence_status.fence = ibs_request.seq_no;
3823
3824 /* wait for IB accomplished */
3825 r = amdgpu_cs_query_fence_status(&fence_status,
3826 AMDGPU_TIMEOUT_INFINITE,
3827 0, &expired);
3828
3829 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3830 CU_ASSERT_EQUAL(r, 0);
3831 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3832
3833 r = amdgpu_bo_list_destroy(bo_list);
3834 CU_ASSERT_EQUAL(r, 0);
3835
3836 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3837 CU_ASSERT_EQUAL(r, 0);
3838 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3839 CU_ASSERT_EQUAL(r, 0);
3840
3841 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3842 CU_ASSERT_EQUAL(r, 0);
3843
3844 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size);
3845 CU_ASSERT_EQUAL(r, 0);
3846 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size);
3847 CU_ASSERT_EQUAL(r, 0);
3848
3849 r = amdgpu_cs_ctx_free(context_handle);
3850 CU_ASSERT_EQUAL(r, 0);
3851 }
3852
amdgpu_gpu_reset_test(void)3853 static void amdgpu_gpu_reset_test(void)
3854 {
3855 int r;
3856 char debugfs_path[256], tmp[10];
3857 int fd;
3858 struct stat sbuf;
3859 amdgpu_context_handle context_handle;
3860 uint32_t hang_state, hangs;
3861
3862 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3863 CU_ASSERT_EQUAL(r, 0);
3864
3865 r = fstat(drm_amdgpu[0], &sbuf);
3866 CU_ASSERT_EQUAL(r, 0);
3867
3868 sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
3869 fd = open(debugfs_path, O_RDONLY);
3870 CU_ASSERT(fd >= 0);
3871
3872 r = read(fd, tmp, sizeof(tmp)/sizeof(char));
3873 CU_ASSERT(r > 0);
3874
3875 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3876 CU_ASSERT_EQUAL(r, 0);
3877 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3878
3879 close(fd);
3880 r = amdgpu_cs_ctx_free(context_handle);
3881 CU_ASSERT_EQUAL(r, 0);
3882
3883 amdgpu_compute_dispatch_test();
3884 amdgpu_gfx_dispatch_test();
3885 }
3886
amdgpu_stable_pstate_test(void)3887 static void amdgpu_stable_pstate_test(void)
3888 {
3889 int r;
3890 amdgpu_context_handle context_handle;
3891 uint32_t current_pstate = 0, new_pstate = 0;
3892
3893 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3894 CU_ASSERT_EQUAL(r, 0);
3895
3896 r = amdgpu_cs_ctx_stable_pstate(context_handle,
3897 AMDGPU_CTX_OP_GET_STABLE_PSTATE,
3898 0, ¤t_pstate);
3899 CU_ASSERT_EQUAL(r, 0);
3900 CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_NONE);
3901
3902 r = amdgpu_cs_ctx_stable_pstate(context_handle,
3903 AMDGPU_CTX_OP_SET_STABLE_PSTATE,
3904 AMDGPU_CTX_STABLE_PSTATE_PEAK, NULL);
3905 CU_ASSERT_EQUAL(r, 0);
3906
3907 r = amdgpu_cs_ctx_stable_pstate(context_handle,
3908 AMDGPU_CTX_OP_GET_STABLE_PSTATE,
3909 0, &new_pstate);
3910 CU_ASSERT_EQUAL(r, 0);
3911 CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_PEAK);
3912
3913 r = amdgpu_cs_ctx_free(context_handle);
3914 CU_ASSERT_EQUAL(r, 0);
3915 }
3916