1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <unistd.h>
27 #include <sys/types.h>
28 #ifdef MAJOR_IN_SYSMACROS
29 #include <sys/sysmacros.h>
30 #endif
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #ifdef HAVE_ALLOCA_H
34 # include <alloca.h>
35 #endif
36 #include <sys/wait.h>
37
38 #include "CUnit/Basic.h"
39
40 #include "amdgpu_test.h"
41 #include "amdgpu_drm.h"
42 #include "util_math.h"
43
44 static amdgpu_device_handle device_handle;
45 static uint32_t major_version;
46 static uint32_t minor_version;
47 static uint32_t family_id;
48
49 static void amdgpu_query_info_test(void);
50 static void amdgpu_command_submission_gfx(void);
51 static void amdgpu_command_submission_compute(void);
52 static void amdgpu_command_submission_multi_fence(void);
53 static void amdgpu_command_submission_sdma(void);
54 static void amdgpu_userptr_test(void);
55 static void amdgpu_semaphore_test(void);
56 static void amdgpu_sync_dependency_test(void);
57 static void amdgpu_bo_eviction_test(void);
58 static void amdgpu_compute_dispatch_test(void);
59 static void amdgpu_gfx_dispatch_test(void);
60 static void amdgpu_draw_test(void);
61 static void amdgpu_gpu_reset_test(void);
62
63 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
64 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
65 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
66 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
67 unsigned ip_type,
68 int instance, int pm4_dw, uint32_t *pm4_src,
69 int res_cnt, amdgpu_bo_handle *resources,
70 struct amdgpu_cs_ib_info *ib_info,
71 struct amdgpu_cs_request *ibs_request);
72
73 CU_TestInfo basic_tests[] = {
74 { "Query Info Test", amdgpu_query_info_test },
75 { "Userptr Test", amdgpu_userptr_test },
76 { "bo eviction Test", amdgpu_bo_eviction_test },
77 { "Command submission Test (GFX)", amdgpu_command_submission_gfx },
78 { "Command submission Test (Compute)", amdgpu_command_submission_compute },
79 { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
80 { "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
81 { "SW semaphore Test", amdgpu_semaphore_test },
82 { "Sync dependency Test", amdgpu_sync_dependency_test },
83 { "Dispatch Test (Compute)", amdgpu_compute_dispatch_test },
84 { "Dispatch Test (GFX)", amdgpu_gfx_dispatch_test },
85 { "Draw Test", amdgpu_draw_test },
86 { "GPU reset Test", amdgpu_gpu_reset_test },
87 CU_TEST_INFO_NULL,
88 };
89 #define BUFFER_SIZE (8 * 1024)
90 #define SDMA_PKT_HEADER_op_offset 0
91 #define SDMA_PKT_HEADER_op_mask 0x000000FF
92 #define SDMA_PKT_HEADER_op_shift 0
93 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
94 #define SDMA_OPCODE_CONSTANT_FILL 11
95 # define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14)
96 /* 0 = byte fill
97 * 2 = DW fill
98 */
99 #define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \
100 (((sub_op) & 0xFF) << 8) | \
101 (((op) & 0xFF) << 0))
102 #define SDMA_OPCODE_WRITE 2
103 # define SDMA_WRITE_SUB_OPCODE_LINEAR 0
104 # define SDMA_WRTIE_SUB_OPCODE_TILED 1
105
106 #define SDMA_OPCODE_COPY 1
107 # define SDMA_COPY_SUB_OPCODE_LINEAR 0
108
109 #define GFX_COMPUTE_NOP 0xffff1000
110 #define SDMA_NOP 0x0
111
112 /* PM4 */
113 #define PACKET_TYPE0 0
114 #define PACKET_TYPE1 1
115 #define PACKET_TYPE2 2
116 #define PACKET_TYPE3 3
117
118 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
119 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
120 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
121 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
122 #define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
123 ((reg) & 0xFFFF) | \
124 ((n) & 0x3FFF) << 16)
125 #define CP_PACKET2 0x80000000
126 #define PACKET2_PAD_SHIFT 0
127 #define PACKET2_PAD_MASK (0x3fffffff << 0)
128
129 #define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
130
131 #define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
132 (((op) & 0xFF) << 8) | \
133 ((n) & 0x3FFF) << 16)
134 #define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
135
136 /* Packet 3 types */
137 #define PACKET3_NOP 0x10
138
139 #define PACKET3_WRITE_DATA 0x37
140 #define WRITE_DATA_DST_SEL(x) ((x) << 8)
141 /* 0 - register
142 * 1 - memory (sync - via GRBM)
143 * 2 - gl2
144 * 3 - gds
145 * 4 - reserved
146 * 5 - memory (async - direct)
147 */
148 #define WR_ONE_ADDR (1 << 16)
149 #define WR_CONFIRM (1 << 20)
150 #define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
151 /* 0 - LRU
152 * 1 - Stream
153 */
154 #define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
155 /* 0 - me
156 * 1 - pfp
157 * 2 - ce
158 */
159
160 #define PACKET3_DMA_DATA 0x50
161 /* 1. header
162 * 2. CONTROL
163 * 3. SRC_ADDR_LO or DATA [31:0]
164 * 4. SRC_ADDR_HI [31:0]
165 * 5. DST_ADDR_LO [31:0]
166 * 6. DST_ADDR_HI [7:0]
167 * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
168 */
169 /* CONTROL */
170 # define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0)
171 /* 0 - ME
172 * 1 - PFP
173 */
174 # define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
175 /* 0 - LRU
176 * 1 - Stream
177 * 2 - Bypass
178 */
179 # define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
180 # define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20)
181 /* 0 - DST_ADDR using DAS
182 * 1 - GDS
183 * 3 - DST_ADDR using L2
184 */
185 # define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
186 /* 0 - LRU
187 * 1 - Stream
188 * 2 - Bypass
189 */
190 # define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
191 # define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29)
192 /* 0 - SRC_ADDR using SAS
193 * 1 - GDS
194 * 2 - DATA
195 * 3 - SRC_ADDR using L2
196 */
197 # define PACKET3_DMA_DATA_CP_SYNC (1 << 31)
198 /* COMMAND */
199 # define PACKET3_DMA_DATA_DIS_WC (1 << 21)
200 # define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
201 /* 0 - none
202 * 1 - 8 in 16
203 * 2 - 8 in 32
204 * 3 - 8 in 64
205 */
206 # define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
207 /* 0 - none
208 * 1 - 8 in 16
209 * 2 - 8 in 32
210 * 3 - 8 in 64
211 */
212 # define PACKET3_DMA_DATA_CMD_SAS (1 << 26)
213 /* 0 - memory
214 * 1 - register
215 */
216 # define PACKET3_DMA_DATA_CMD_DAS (1 << 27)
217 /* 0 - memory
218 * 1 - register
219 */
220 # define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
221 # define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
222 # define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
223
224 #define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \
225 (((b) & 0x1) << 26) | \
226 (((t) & 0x1) << 23) | \
227 (((s) & 0x1) << 22) | \
228 (((cnt) & 0xFFFFF) << 0))
229 #define SDMA_OPCODE_COPY_SI 3
230 #define SDMA_OPCODE_CONSTANT_FILL_SI 13
231 #define SDMA_NOP_SI 0xf
232 #define GFX_COMPUTE_NOP_SI 0x80000000
233 #define PACKET3_DMA_DATA_SI 0x41
234 # define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27)
235 /* 0 - ME
236 * 1 - PFP
237 */
238 # define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20)
239 /* 0 - DST_ADDR using DAS
240 * 1 - GDS
241 * 3 - DST_ADDR using L2
242 */
243 # define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29)
244 /* 0 - SRC_ADDR using SAS
245 * 1 - GDS
246 * 2 - DATA
247 * 3 - SRC_ADDR using L2
248 */
249 # define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31)
250
251
252 #define PKT3_CONTEXT_CONTROL 0x28
253 #define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
254 #define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28)
255 #define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
256
257 #define PKT3_CLEAR_STATE 0x12
258
259 #define PKT3_SET_SH_REG 0x76
260 #define PACKET3_SET_SH_REG_START 0x00002c00
261
262 #define PACKET3_DISPATCH_DIRECT 0x15
263 #define PACKET3_EVENT_WRITE 0x46
264 #define PACKET3_ACQUIRE_MEM 0x58
265 #define PACKET3_SET_CONTEXT_REG 0x69
266 #define PACKET3_SET_UCONFIG_REG 0x79
267 #define PACKET3_DRAW_INDEX_AUTO 0x2D
268 /* gfx 8 */
269 #define mmCOMPUTE_PGM_LO 0x2e0c
270 #define mmCOMPUTE_PGM_RSRC1 0x2e12
271 #define mmCOMPUTE_TMPRING_SIZE 0x2e18
272 #define mmCOMPUTE_USER_DATA_0 0x2e40
273 #define mmCOMPUTE_USER_DATA_1 0x2e41
274 #define mmCOMPUTE_RESOURCE_LIMITS 0x2e15
275 #define mmCOMPUTE_NUM_THREAD_X 0x2e07
276
277
278
279 #define SWAP_32(num) (((num & 0xff000000) >> 24) | \
280 ((num & 0x0000ff00) << 8) | \
281 ((num & 0x00ff0000) >> 8) | \
282 ((num & 0x000000ff) << 24))
283
284
285 /* Shader code
286 * void main()
287 {
288
289 float x = some_input;
290 for (unsigned i = 0; i < 1000000; i++)
291 x = sin(x);
292
293 u[0] = 42u;
294 }
295 */
296
297 static uint32_t shader_bin[] = {
298 SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
299 SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
300 SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
301 SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
302 };
303
304 #define CODE_OFFSET 512
305 #define DATA_OFFSET 1024
306
307 enum cs_type {
308 CS_BUFFERCLEAR,
309 CS_BUFFERCOPY
310 };
311
312 static const uint32_t bufferclear_cs_shader_gfx9[] = {
313 0xD1FD0000, 0x04010C08, 0x7E020204, 0x7E040205,
314 0x7E060206, 0x7E080207, 0xE01C2000, 0x80000100,
315 0xBF810000
316 };
317
318 static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
319 {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 },
320 {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 },
321 {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
322 {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
323 {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
324 };
325
326 static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
327
328 static const uint32_t buffercopy_cs_shader_gfx9[] = {
329 0xD1FD0000, 0x04010C08, 0xE00C2000, 0x80000100,
330 0xBF8C0F70, 0xE01C2000, 0x80010100, 0xBF810000
331 };
332
333 static const uint32_t preamblecache_gfx9[] = {
334 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
335 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
336 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
337 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
338 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
339 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0,
340 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
341 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
342 0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
343 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
344 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
345 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
346 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
347 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
348 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
349 0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
350 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
351 0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
352 0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
353 0xc0017900, 0x24b, 0x0
354 };
355
356 enum ps_type {
357 PS_CONST,
358 PS_TEX
359 };
360
361 static const uint32_t ps_const_shader_gfx9[] = {
362 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
363 0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
364 0xC4001C0F, 0x00000100, 0xBF810000
365 };
366
367 static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
368
369 static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
370 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
371 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
372 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
373 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
374 { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
375 { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
376 { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
377 { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
378 { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
379 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
380 }
381 };
382
383 static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
384 0x00000004
385 };
386
387 static const uint32_t ps_num_sh_registers_gfx9 = 2;
388
389 static const uint32_t ps_const_sh_registers_gfx9[][2] = {
390 {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
391 {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
392 };
393
394 static const uint32_t ps_num_context_registers_gfx9 = 7;
395
396 static const uint32_t ps_const_context_reg_gfx9[][2] = {
397 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 },
398 {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 },
399 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F },
400 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 },
401 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 },
402 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */},
403 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 }
404 };
405
406 static const uint32_t ps_tex_shader_gfx9[] = {
407 0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
408 0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
409 0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
410 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
411 0x00000100, 0xBF810000
412 };
413
414 static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
415 0x0000000B
416 };
417
418 static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
419
420 static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
421 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
422 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
423 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
424 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
425 { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
426 { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
427 { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
428 { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
429 { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
430 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
431 }
432 };
433
434 static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
435 {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
436 {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
437 };
438
439 static const uint32_t ps_tex_context_reg_gfx9[][2] = {
440 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 },
441 {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 },
442 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F },
443 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 },
444 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 },
445 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */},
446 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 }
447 };
448
449 static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
450 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
451 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
452 0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
453 0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
454 0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
455 0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
456 0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
457 0xC400020F, 0x05060403, 0xBF810000
458 };
459
460 static const uint32_t cached_cmd_gfx9[] = {
461 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
462 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
463 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
464 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x1a,
465 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
466 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
467 0xc0026900, 0x292, 0x20, 0x60201b8,
468 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
469 };
470
amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev,unsigned size,unsigned alignment,unsigned heap,uint64_t alloc_flags,uint64_t mapping_flags,amdgpu_bo_handle * bo,void ** cpu,uint64_t * mc_address,amdgpu_va_handle * va_handle)471 int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
472 unsigned alignment, unsigned heap, uint64_t alloc_flags,
473 uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
474 uint64_t *mc_address,
475 amdgpu_va_handle *va_handle)
476 {
477 struct amdgpu_bo_alloc_request request = {};
478 amdgpu_bo_handle buf_handle;
479 amdgpu_va_handle handle;
480 uint64_t vmc_addr;
481 int r;
482
483 request.alloc_size = size;
484 request.phys_alignment = alignment;
485 request.preferred_heap = heap;
486 request.flags = alloc_flags;
487
488 r = amdgpu_bo_alloc(dev, &request, &buf_handle);
489 if (r)
490 return r;
491
492 r = amdgpu_va_range_alloc(dev,
493 amdgpu_gpu_va_range_general,
494 size, alignment, 0, &vmc_addr,
495 &handle, 0);
496 if (r)
497 goto error_va_alloc;
498
499 r = amdgpu_bo_va_op_raw(dev, buf_handle, 0, ALIGN(size, getpagesize()), vmc_addr,
500 AMDGPU_VM_PAGE_READABLE |
501 AMDGPU_VM_PAGE_WRITEABLE |
502 AMDGPU_VM_PAGE_EXECUTABLE |
503 mapping_flags,
504 AMDGPU_VA_OP_MAP);
505 if (r)
506 goto error_va_map;
507
508 r = amdgpu_bo_cpu_map(buf_handle, cpu);
509 if (r)
510 goto error_cpu_map;
511
512 *bo = buf_handle;
513 *mc_address = vmc_addr;
514 *va_handle = handle;
515
516 return 0;
517
518 error_cpu_map:
519 amdgpu_bo_cpu_unmap(buf_handle);
520
521 error_va_map:
522 amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
523
524 error_va_alloc:
525 amdgpu_bo_free(buf_handle);
526 return r;
527 }
528
529
530
suite_basic_tests_init(void)531 int suite_basic_tests_init(void)
532 {
533 struct amdgpu_gpu_info gpu_info = {0};
534 int r;
535
536 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
537 &minor_version, &device_handle);
538
539 if (r) {
540 if ((r == -EACCES) && (errno == EACCES))
541 printf("\n\nError:%s. "
542 "Hint:Try to run this test program as root.",
543 strerror(errno));
544 return CUE_SINIT_FAILED;
545 }
546
547 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
548 if (r)
549 return CUE_SINIT_FAILED;
550
551 family_id = gpu_info.family_id;
552
553 return CUE_SUCCESS;
554 }
555
suite_basic_tests_clean(void)556 int suite_basic_tests_clean(void)
557 {
558 int r = amdgpu_device_deinitialize(device_handle);
559
560 if (r == 0)
561 return CUE_SUCCESS;
562 else
563 return CUE_SCLEAN_FAILED;
564 }
565
amdgpu_query_info_test(void)566 static void amdgpu_query_info_test(void)
567 {
568 struct amdgpu_gpu_info gpu_info = {0};
569 uint32_t version, feature;
570 int r;
571
572 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
573 CU_ASSERT_EQUAL(r, 0);
574
575 r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
576 0, &version, &feature);
577 CU_ASSERT_EQUAL(r, 0);
578 }
579
amdgpu_command_submission_gfx_separate_ibs(void)580 static void amdgpu_command_submission_gfx_separate_ibs(void)
581 {
582 amdgpu_context_handle context_handle;
583 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
584 void *ib_result_cpu, *ib_result_ce_cpu;
585 uint64_t ib_result_mc_address, ib_result_ce_mc_address;
586 struct amdgpu_cs_request ibs_request = {0};
587 struct amdgpu_cs_ib_info ib_info[2];
588 struct amdgpu_cs_fence fence_status = {0};
589 uint32_t *ptr;
590 uint32_t expired;
591 amdgpu_bo_list_handle bo_list;
592 amdgpu_va_handle va_handle, va_handle_ce;
593 int r, i = 0;
594
595 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
596 CU_ASSERT_EQUAL(r, 0);
597
598 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
599 AMDGPU_GEM_DOMAIN_GTT, 0,
600 &ib_result_handle, &ib_result_cpu,
601 &ib_result_mc_address, &va_handle);
602 CU_ASSERT_EQUAL(r, 0);
603
604 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
605 AMDGPU_GEM_DOMAIN_GTT, 0,
606 &ib_result_ce_handle, &ib_result_ce_cpu,
607 &ib_result_ce_mc_address, &va_handle_ce);
608 CU_ASSERT_EQUAL(r, 0);
609
610 r = amdgpu_get_bo_list(device_handle, ib_result_handle,
611 ib_result_ce_handle, &bo_list);
612 CU_ASSERT_EQUAL(r, 0);
613
614 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
615
616 /* IT_SET_CE_DE_COUNTERS */
617 ptr = ib_result_ce_cpu;
618 if (family_id != AMDGPU_FAMILY_SI) {
619 ptr[i++] = 0xc0008900;
620 ptr[i++] = 0;
621 }
622 ptr[i++] = 0xc0008400;
623 ptr[i++] = 1;
624 ib_info[0].ib_mc_address = ib_result_ce_mc_address;
625 ib_info[0].size = i;
626 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
627
628 /* IT_WAIT_ON_CE_COUNTER */
629 ptr = ib_result_cpu;
630 ptr[0] = 0xc0008600;
631 ptr[1] = 0x00000001;
632 ib_info[1].ib_mc_address = ib_result_mc_address;
633 ib_info[1].size = 2;
634
635 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
636 ibs_request.number_of_ibs = 2;
637 ibs_request.ibs = ib_info;
638 ibs_request.resources = bo_list;
639 ibs_request.fence_info.handle = NULL;
640
641 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
642
643 CU_ASSERT_EQUAL(r, 0);
644
645 fence_status.context = context_handle;
646 fence_status.ip_type = AMDGPU_HW_IP_GFX;
647 fence_status.ip_instance = 0;
648 fence_status.fence = ibs_request.seq_no;
649
650 r = amdgpu_cs_query_fence_status(&fence_status,
651 AMDGPU_TIMEOUT_INFINITE,
652 0, &expired);
653 CU_ASSERT_EQUAL(r, 0);
654
655 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
656 ib_result_mc_address, 4096);
657 CU_ASSERT_EQUAL(r, 0);
658
659 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
660 ib_result_ce_mc_address, 4096);
661 CU_ASSERT_EQUAL(r, 0);
662
663 r = amdgpu_bo_list_destroy(bo_list);
664 CU_ASSERT_EQUAL(r, 0);
665
666 r = amdgpu_cs_ctx_free(context_handle);
667 CU_ASSERT_EQUAL(r, 0);
668
669 }
670
amdgpu_command_submission_gfx_shared_ib(void)671 static void amdgpu_command_submission_gfx_shared_ib(void)
672 {
673 amdgpu_context_handle context_handle;
674 amdgpu_bo_handle ib_result_handle;
675 void *ib_result_cpu;
676 uint64_t ib_result_mc_address;
677 struct amdgpu_cs_request ibs_request = {0};
678 struct amdgpu_cs_ib_info ib_info[2];
679 struct amdgpu_cs_fence fence_status = {0};
680 uint32_t *ptr;
681 uint32_t expired;
682 amdgpu_bo_list_handle bo_list;
683 amdgpu_va_handle va_handle;
684 int r, i = 0;
685
686 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
687 CU_ASSERT_EQUAL(r, 0);
688
689 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
690 AMDGPU_GEM_DOMAIN_GTT, 0,
691 &ib_result_handle, &ib_result_cpu,
692 &ib_result_mc_address, &va_handle);
693 CU_ASSERT_EQUAL(r, 0);
694
695 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
696 &bo_list);
697 CU_ASSERT_EQUAL(r, 0);
698
699 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
700
701 /* IT_SET_CE_DE_COUNTERS */
702 ptr = ib_result_cpu;
703 if (family_id != AMDGPU_FAMILY_SI) {
704 ptr[i++] = 0xc0008900;
705 ptr[i++] = 0;
706 }
707 ptr[i++] = 0xc0008400;
708 ptr[i++] = 1;
709 ib_info[0].ib_mc_address = ib_result_mc_address;
710 ib_info[0].size = i;
711 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
712
713 ptr = (uint32_t *)ib_result_cpu + 4;
714 ptr[0] = 0xc0008600;
715 ptr[1] = 0x00000001;
716 ib_info[1].ib_mc_address = ib_result_mc_address + 16;
717 ib_info[1].size = 2;
718
719 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
720 ibs_request.number_of_ibs = 2;
721 ibs_request.ibs = ib_info;
722 ibs_request.resources = bo_list;
723 ibs_request.fence_info.handle = NULL;
724
725 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
726
727 CU_ASSERT_EQUAL(r, 0);
728
729 fence_status.context = context_handle;
730 fence_status.ip_type = AMDGPU_HW_IP_GFX;
731 fence_status.ip_instance = 0;
732 fence_status.fence = ibs_request.seq_no;
733
734 r = amdgpu_cs_query_fence_status(&fence_status,
735 AMDGPU_TIMEOUT_INFINITE,
736 0, &expired);
737 CU_ASSERT_EQUAL(r, 0);
738
739 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
740 ib_result_mc_address, 4096);
741 CU_ASSERT_EQUAL(r, 0);
742
743 r = amdgpu_bo_list_destroy(bo_list);
744 CU_ASSERT_EQUAL(r, 0);
745
746 r = amdgpu_cs_ctx_free(context_handle);
747 CU_ASSERT_EQUAL(r, 0);
748 }
749
amdgpu_command_submission_gfx_cp_write_data(void)750 static void amdgpu_command_submission_gfx_cp_write_data(void)
751 {
752 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
753 }
754
amdgpu_command_submission_gfx_cp_const_fill(void)755 static void amdgpu_command_submission_gfx_cp_const_fill(void)
756 {
757 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
758 }
759
amdgpu_command_submission_gfx_cp_copy_data(void)760 static void amdgpu_command_submission_gfx_cp_copy_data(void)
761 {
762 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
763 }
764
amdgpu_bo_eviction_test(void)765 static void amdgpu_bo_eviction_test(void)
766 {
767 const int sdma_write_length = 1024;
768 const int pm4_dw = 256;
769 amdgpu_context_handle context_handle;
770 amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
771 amdgpu_bo_handle *resources;
772 uint32_t *pm4;
773 struct amdgpu_cs_ib_info *ib_info;
774 struct amdgpu_cs_request *ibs_request;
775 uint64_t bo1_mc, bo2_mc;
776 volatile unsigned char *bo1_cpu, *bo2_cpu;
777 int i, j, r, loop1, loop2;
778 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
779 amdgpu_va_handle bo1_va_handle, bo2_va_handle;
780 struct amdgpu_heap_info vram_info, gtt_info;
781
782 pm4 = calloc(pm4_dw, sizeof(*pm4));
783 CU_ASSERT_NOT_EQUAL(pm4, NULL);
784
785 ib_info = calloc(1, sizeof(*ib_info));
786 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
787
788 ibs_request = calloc(1, sizeof(*ibs_request));
789 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
790
791 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
792 CU_ASSERT_EQUAL(r, 0);
793
794 /* prepare resource */
795 resources = calloc(4, sizeof(amdgpu_bo_handle));
796 CU_ASSERT_NOT_EQUAL(resources, NULL);
797
798 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
799 0, &vram_info);
800 CU_ASSERT_EQUAL(r, 0);
801
802 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
803 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
804 CU_ASSERT_EQUAL(r, 0);
805 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
806 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
807 CU_ASSERT_EQUAL(r, 0);
808
809 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
810 0, >t_info);
811 CU_ASSERT_EQUAL(r, 0);
812
813 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
814 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]);
815 CU_ASSERT_EQUAL(r, 0);
816 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
817 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]);
818 CU_ASSERT_EQUAL(r, 0);
819
820
821
822 loop1 = loop2 = 0;
823 /* run 9 circle to test all mapping combination */
824 while(loop1 < 2) {
825 while(loop2 < 2) {
826 /* allocate UC bo1for sDMA use */
827 r = amdgpu_bo_alloc_and_map(device_handle,
828 sdma_write_length, 4096,
829 AMDGPU_GEM_DOMAIN_GTT,
830 gtt_flags[loop1], &bo1,
831 (void**)&bo1_cpu, &bo1_mc,
832 &bo1_va_handle);
833 CU_ASSERT_EQUAL(r, 0);
834
835 /* set bo1 */
836 memset((void*)bo1_cpu, 0xaa, sdma_write_length);
837
838 /* allocate UC bo2 for sDMA use */
839 r = amdgpu_bo_alloc_and_map(device_handle,
840 sdma_write_length, 4096,
841 AMDGPU_GEM_DOMAIN_GTT,
842 gtt_flags[loop2], &bo2,
843 (void**)&bo2_cpu, &bo2_mc,
844 &bo2_va_handle);
845 CU_ASSERT_EQUAL(r, 0);
846
847 /* clear bo2 */
848 memset((void*)bo2_cpu, 0, sdma_write_length);
849
850 resources[0] = bo1;
851 resources[1] = bo2;
852 resources[2] = vram_max[loop2];
853 resources[3] = gtt_max[loop2];
854
855 /* fulfill PM4: test DMA copy linear */
856 i = j = 0;
857 if (family_id == AMDGPU_FAMILY_SI) {
858 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
859 sdma_write_length);
860 pm4[i++] = 0xffffffff & bo2_mc;
861 pm4[i++] = 0xffffffff & bo1_mc;
862 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
863 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
864 } else {
865 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
866 if (family_id >= AMDGPU_FAMILY_AI)
867 pm4[i++] = sdma_write_length - 1;
868 else
869 pm4[i++] = sdma_write_length;
870 pm4[i++] = 0;
871 pm4[i++] = 0xffffffff & bo1_mc;
872 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
873 pm4[i++] = 0xffffffff & bo2_mc;
874 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
875 }
876
877 amdgpu_test_exec_cs_helper(context_handle,
878 AMDGPU_HW_IP_DMA, 0,
879 i, pm4,
880 4, resources,
881 ib_info, ibs_request);
882
883 /* verify if SDMA test result meets with expected */
884 i = 0;
885 while(i < sdma_write_length) {
886 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
887 }
888 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
889 sdma_write_length);
890 CU_ASSERT_EQUAL(r, 0);
891 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
892 sdma_write_length);
893 CU_ASSERT_EQUAL(r, 0);
894 loop2++;
895 }
896 loop2 = 0;
897 loop1++;
898 }
899 amdgpu_bo_free(vram_max[0]);
900 amdgpu_bo_free(vram_max[1]);
901 amdgpu_bo_free(gtt_max[0]);
902 amdgpu_bo_free(gtt_max[1]);
903 /* clean resources */
904 free(resources);
905 free(ibs_request);
906 free(ib_info);
907 free(pm4);
908
909 /* end of test */
910 r = amdgpu_cs_ctx_free(context_handle);
911 CU_ASSERT_EQUAL(r, 0);
912 }
913
914
amdgpu_command_submission_gfx(void)915 static void amdgpu_command_submission_gfx(void)
916 {
917 /* write data using the CP */
918 amdgpu_command_submission_gfx_cp_write_data();
919 /* const fill using the CP */
920 amdgpu_command_submission_gfx_cp_const_fill();
921 /* copy data using the CP */
922 amdgpu_command_submission_gfx_cp_copy_data();
923 /* separate IB buffers for multi-IB submission */
924 amdgpu_command_submission_gfx_separate_ibs();
925 /* shared IB buffer for multi-IB submission */
926 amdgpu_command_submission_gfx_shared_ib();
927 }
928
amdgpu_semaphore_test(void)929 static void amdgpu_semaphore_test(void)
930 {
931 amdgpu_context_handle context_handle[2];
932 amdgpu_semaphore_handle sem;
933 amdgpu_bo_handle ib_result_handle[2];
934 void *ib_result_cpu[2];
935 uint64_t ib_result_mc_address[2];
936 struct amdgpu_cs_request ibs_request[2] = {0};
937 struct amdgpu_cs_ib_info ib_info[2] = {0};
938 struct amdgpu_cs_fence fence_status = {0};
939 uint32_t *ptr;
940 uint32_t expired;
941 uint32_t sdma_nop, gfx_nop;
942 amdgpu_bo_list_handle bo_list[2];
943 amdgpu_va_handle va_handle[2];
944 int r, i;
945
946 if (family_id == AMDGPU_FAMILY_SI) {
947 sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
948 gfx_nop = GFX_COMPUTE_NOP_SI;
949 } else {
950 sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
951 gfx_nop = GFX_COMPUTE_NOP;
952 }
953
954 r = amdgpu_cs_create_semaphore(&sem);
955 CU_ASSERT_EQUAL(r, 0);
956 for (i = 0; i < 2; i++) {
957 r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
958 CU_ASSERT_EQUAL(r, 0);
959
960 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
961 AMDGPU_GEM_DOMAIN_GTT, 0,
962 &ib_result_handle[i], &ib_result_cpu[i],
963 &ib_result_mc_address[i], &va_handle[i]);
964 CU_ASSERT_EQUAL(r, 0);
965
966 r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
967 NULL, &bo_list[i]);
968 CU_ASSERT_EQUAL(r, 0);
969 }
970
971 /* 1. same context different engine */
972 ptr = ib_result_cpu[0];
973 ptr[0] = sdma_nop;
974 ib_info[0].ib_mc_address = ib_result_mc_address[0];
975 ib_info[0].size = 1;
976
977 ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
978 ibs_request[0].number_of_ibs = 1;
979 ibs_request[0].ibs = &ib_info[0];
980 ibs_request[0].resources = bo_list[0];
981 ibs_request[0].fence_info.handle = NULL;
982 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
983 CU_ASSERT_EQUAL(r, 0);
984 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
985 CU_ASSERT_EQUAL(r, 0);
986
987 r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
988 CU_ASSERT_EQUAL(r, 0);
989 ptr = ib_result_cpu[1];
990 ptr[0] = gfx_nop;
991 ib_info[1].ib_mc_address = ib_result_mc_address[1];
992 ib_info[1].size = 1;
993
994 ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
995 ibs_request[1].number_of_ibs = 1;
996 ibs_request[1].ibs = &ib_info[1];
997 ibs_request[1].resources = bo_list[1];
998 ibs_request[1].fence_info.handle = NULL;
999
1000 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
1001 CU_ASSERT_EQUAL(r, 0);
1002
1003 fence_status.context = context_handle[0];
1004 fence_status.ip_type = AMDGPU_HW_IP_GFX;
1005 fence_status.ip_instance = 0;
1006 fence_status.fence = ibs_request[1].seq_no;
1007 r = amdgpu_cs_query_fence_status(&fence_status,
1008 500000000, 0, &expired);
1009 CU_ASSERT_EQUAL(r, 0);
1010 CU_ASSERT_EQUAL(expired, true);
1011
1012 /* 2. same engine different context */
1013 ptr = ib_result_cpu[0];
1014 ptr[0] = gfx_nop;
1015 ib_info[0].ib_mc_address = ib_result_mc_address[0];
1016 ib_info[0].size = 1;
1017
1018 ibs_request[0].ip_type = AMDGPU_HW_IP_GFX;
1019 ibs_request[0].number_of_ibs = 1;
1020 ibs_request[0].ibs = &ib_info[0];
1021 ibs_request[0].resources = bo_list[0];
1022 ibs_request[0].fence_info.handle = NULL;
1023 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1024 CU_ASSERT_EQUAL(r, 0);
1025 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
1026 CU_ASSERT_EQUAL(r, 0);
1027
1028 r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
1029 CU_ASSERT_EQUAL(r, 0);
1030 ptr = ib_result_cpu[1];
1031 ptr[0] = gfx_nop;
1032 ib_info[1].ib_mc_address = ib_result_mc_address[1];
1033 ib_info[1].size = 1;
1034
1035 ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
1036 ibs_request[1].number_of_ibs = 1;
1037 ibs_request[1].ibs = &ib_info[1];
1038 ibs_request[1].resources = bo_list[1];
1039 ibs_request[1].fence_info.handle = NULL;
1040 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
1041
1042 CU_ASSERT_EQUAL(r, 0);
1043
1044 fence_status.context = context_handle[1];
1045 fence_status.ip_type = AMDGPU_HW_IP_GFX;
1046 fence_status.ip_instance = 0;
1047 fence_status.fence = ibs_request[1].seq_no;
1048 r = amdgpu_cs_query_fence_status(&fence_status,
1049 500000000, 0, &expired);
1050 CU_ASSERT_EQUAL(r, 0);
1051 CU_ASSERT_EQUAL(expired, true);
1052
1053 for (i = 0; i < 2; i++) {
1054 r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
1055 ib_result_mc_address[i], 4096);
1056 CU_ASSERT_EQUAL(r, 0);
1057
1058 r = amdgpu_bo_list_destroy(bo_list[i]);
1059 CU_ASSERT_EQUAL(r, 0);
1060
1061 r = amdgpu_cs_ctx_free(context_handle[i]);
1062 CU_ASSERT_EQUAL(r, 0);
1063 }
1064
1065 r = amdgpu_cs_destroy_semaphore(sem);
1066 CU_ASSERT_EQUAL(r, 0);
1067 }
1068
amdgpu_command_submission_compute_nop(void)1069 static void amdgpu_command_submission_compute_nop(void)
1070 {
1071 amdgpu_context_handle context_handle;
1072 amdgpu_bo_handle ib_result_handle;
1073 void *ib_result_cpu;
1074 uint64_t ib_result_mc_address;
1075 struct amdgpu_cs_request ibs_request;
1076 struct amdgpu_cs_ib_info ib_info;
1077 struct amdgpu_cs_fence fence_status;
1078 uint32_t *ptr;
1079 uint32_t expired;
1080 int r, instance;
1081 amdgpu_bo_list_handle bo_list;
1082 amdgpu_va_handle va_handle;
1083 struct drm_amdgpu_info_hw_ip info;
1084
1085 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1086 CU_ASSERT_EQUAL(r, 0);
1087
1088 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1089 CU_ASSERT_EQUAL(r, 0);
1090
1091 for (instance = 0; (1 << instance) & info.available_rings; instance++) {
1092 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1093 AMDGPU_GEM_DOMAIN_GTT, 0,
1094 &ib_result_handle, &ib_result_cpu,
1095 &ib_result_mc_address, &va_handle);
1096 CU_ASSERT_EQUAL(r, 0);
1097
1098 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1099 &bo_list);
1100 CU_ASSERT_EQUAL(r, 0);
1101
1102 ptr = ib_result_cpu;
1103 memset(ptr, 0, 16);
1104 ptr[0]=PACKET3(PACKET3_NOP, 14);
1105
1106 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1107 ib_info.ib_mc_address = ib_result_mc_address;
1108 ib_info.size = 16;
1109
1110 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1111 ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
1112 ibs_request.ring = instance;
1113 ibs_request.number_of_ibs = 1;
1114 ibs_request.ibs = &ib_info;
1115 ibs_request.resources = bo_list;
1116 ibs_request.fence_info.handle = NULL;
1117
1118 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1119 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
1120 CU_ASSERT_EQUAL(r, 0);
1121
1122 fence_status.context = context_handle;
1123 fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
1124 fence_status.ip_instance = 0;
1125 fence_status.ring = instance;
1126 fence_status.fence = ibs_request.seq_no;
1127
1128 r = amdgpu_cs_query_fence_status(&fence_status,
1129 AMDGPU_TIMEOUT_INFINITE,
1130 0, &expired);
1131 CU_ASSERT_EQUAL(r, 0);
1132
1133 r = amdgpu_bo_list_destroy(bo_list);
1134 CU_ASSERT_EQUAL(r, 0);
1135
1136 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1137 ib_result_mc_address, 4096);
1138 CU_ASSERT_EQUAL(r, 0);
1139 }
1140
1141 r = amdgpu_cs_ctx_free(context_handle);
1142 CU_ASSERT_EQUAL(r, 0);
1143 }
1144
amdgpu_command_submission_compute_cp_write_data(void)1145 static void amdgpu_command_submission_compute_cp_write_data(void)
1146 {
1147 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
1148 }
1149
amdgpu_command_submission_compute_cp_const_fill(void)1150 static void amdgpu_command_submission_compute_cp_const_fill(void)
1151 {
1152 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
1153 }
1154
amdgpu_command_submission_compute_cp_copy_data(void)1155 static void amdgpu_command_submission_compute_cp_copy_data(void)
1156 {
1157 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
1158 }
1159
amdgpu_command_submission_compute(void)1160 static void amdgpu_command_submission_compute(void)
1161 {
1162 /* write data using the CP */
1163 amdgpu_command_submission_compute_cp_write_data();
1164 /* const fill using the CP */
1165 amdgpu_command_submission_compute_cp_const_fill();
1166 /* copy data using the CP */
1167 amdgpu_command_submission_compute_cp_copy_data();
1168 /* nop test */
1169 amdgpu_command_submission_compute_nop();
1170 }
1171
1172 /*
1173 * caller need create/release:
1174 * pm4_src, resources, ib_info, and ibs_request
1175 * submit command stream described in ibs_request and wait for this IB accomplished
1176 */
amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request)1177 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
1178 unsigned ip_type,
1179 int instance, int pm4_dw, uint32_t *pm4_src,
1180 int res_cnt, amdgpu_bo_handle *resources,
1181 struct amdgpu_cs_ib_info *ib_info,
1182 struct amdgpu_cs_request *ibs_request)
1183 {
1184 int r;
1185 uint32_t expired;
1186 uint32_t *ring_ptr;
1187 amdgpu_bo_handle ib_result_handle;
1188 void *ib_result_cpu;
1189 uint64_t ib_result_mc_address;
1190 struct amdgpu_cs_fence fence_status = {0};
1191 amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
1192 amdgpu_va_handle va_handle;
1193
1194 /* prepare CS */
1195 CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
1196 CU_ASSERT_NOT_EQUAL(resources, NULL);
1197 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1198 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1199 CU_ASSERT_TRUE(pm4_dw <= 1024);
1200
1201 /* allocate IB */
1202 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1203 AMDGPU_GEM_DOMAIN_GTT, 0,
1204 &ib_result_handle, &ib_result_cpu,
1205 &ib_result_mc_address, &va_handle);
1206 CU_ASSERT_EQUAL(r, 0);
1207
1208 /* copy PM4 packet to ring from caller */
1209 ring_ptr = ib_result_cpu;
1210 memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
1211
1212 ib_info->ib_mc_address = ib_result_mc_address;
1213 ib_info->size = pm4_dw;
1214
1215 ibs_request->ip_type = ip_type;
1216 ibs_request->ring = instance;
1217 ibs_request->number_of_ibs = 1;
1218 ibs_request->ibs = ib_info;
1219 ibs_request->fence_info.handle = NULL;
1220
1221 memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
1222 all_res[res_cnt] = ib_result_handle;
1223
1224 r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
1225 NULL, &ibs_request->resources);
1226 CU_ASSERT_EQUAL(r, 0);
1227
1228 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1229
1230 /* submit CS */
1231 r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
1232 CU_ASSERT_EQUAL(r, 0);
1233
1234 r = amdgpu_bo_list_destroy(ibs_request->resources);
1235 CU_ASSERT_EQUAL(r, 0);
1236
1237 fence_status.ip_type = ip_type;
1238 fence_status.ip_instance = 0;
1239 fence_status.ring = ibs_request->ring;
1240 fence_status.context = context_handle;
1241 fence_status.fence = ibs_request->seq_no;
1242
1243 /* wait for IB accomplished */
1244 r = amdgpu_cs_query_fence_status(&fence_status,
1245 AMDGPU_TIMEOUT_INFINITE,
1246 0, &expired);
1247 CU_ASSERT_EQUAL(r, 0);
1248 CU_ASSERT_EQUAL(expired, true);
1249
1250 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1251 ib_result_mc_address, 4096);
1252 CU_ASSERT_EQUAL(r, 0);
1253 }
1254
amdgpu_command_submission_write_linear_helper(unsigned ip_type)1255 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1256 {
1257 const int sdma_write_length = 128;
1258 const int pm4_dw = 256;
1259 amdgpu_context_handle context_handle;
1260 amdgpu_bo_handle bo;
1261 amdgpu_bo_handle *resources;
1262 uint32_t *pm4;
1263 struct amdgpu_cs_ib_info *ib_info;
1264 struct amdgpu_cs_request *ibs_request;
1265 uint64_t bo_mc;
1266 volatile uint32_t *bo_cpu;
1267 int i, j, r, loop, ring_id;
1268 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1269 amdgpu_va_handle va_handle;
1270 struct drm_amdgpu_info_hw_ip hw_ip_info;
1271
1272 pm4 = calloc(pm4_dw, sizeof(*pm4));
1273 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1274
1275 ib_info = calloc(1, sizeof(*ib_info));
1276 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1277
1278 ibs_request = calloc(1, sizeof(*ibs_request));
1279 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1280
1281 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1282 CU_ASSERT_EQUAL(r, 0);
1283
1284 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1285 CU_ASSERT_EQUAL(r, 0);
1286
1287 /* prepare resource */
1288 resources = calloc(1, sizeof(amdgpu_bo_handle));
1289 CU_ASSERT_NOT_EQUAL(resources, NULL);
1290
1291 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1292 loop = 0;
1293 while(loop < 2) {
1294 /* allocate UC bo for sDMA use */
1295 r = amdgpu_bo_alloc_and_map(device_handle,
1296 sdma_write_length * sizeof(uint32_t),
1297 4096, AMDGPU_GEM_DOMAIN_GTT,
1298 gtt_flags[loop], &bo, (void**)&bo_cpu,
1299 &bo_mc, &va_handle);
1300 CU_ASSERT_EQUAL(r, 0);
1301
1302 /* clear bo */
1303 memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1304
1305 resources[0] = bo;
1306
1307 /* fulfill PM4: test DMA write-linear */
1308 i = j = 0;
1309 if (ip_type == AMDGPU_HW_IP_DMA) {
1310 if (family_id == AMDGPU_FAMILY_SI)
1311 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1312 sdma_write_length);
1313 else
1314 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1315 SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
1316 pm4[i++] = 0xffffffff & bo_mc;
1317 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1318 if (family_id >= AMDGPU_FAMILY_AI)
1319 pm4[i++] = sdma_write_length - 1;
1320 else if (family_id != AMDGPU_FAMILY_SI)
1321 pm4[i++] = sdma_write_length;
1322 while(j++ < sdma_write_length)
1323 pm4[i++] = 0xdeadbeaf;
1324 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1325 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1326 pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1327 pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1328 pm4[i++] = 0xfffffffc & bo_mc;
1329 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1330 while(j++ < sdma_write_length)
1331 pm4[i++] = 0xdeadbeaf;
1332 }
1333
1334 amdgpu_test_exec_cs_helper(context_handle,
1335 ip_type, ring_id,
1336 i, pm4,
1337 1, resources,
1338 ib_info, ibs_request);
1339
1340 /* verify if SDMA test result meets with expected */
1341 i = 0;
1342 while(i < sdma_write_length) {
1343 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1344 }
1345
1346 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1347 sdma_write_length * sizeof(uint32_t));
1348 CU_ASSERT_EQUAL(r, 0);
1349 loop++;
1350 }
1351 }
1352 /* clean resources */
1353 free(resources);
1354 free(ibs_request);
1355 free(ib_info);
1356 free(pm4);
1357
1358 /* end of test */
1359 r = amdgpu_cs_ctx_free(context_handle);
1360 CU_ASSERT_EQUAL(r, 0);
1361 }
1362
amdgpu_command_submission_sdma_write_linear(void)1363 static void amdgpu_command_submission_sdma_write_linear(void)
1364 {
1365 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1366 }
1367
amdgpu_command_submission_const_fill_helper(unsigned ip_type)1368 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1369 {
1370 const int sdma_write_length = 1024 * 1024;
1371 const int pm4_dw = 256;
1372 amdgpu_context_handle context_handle;
1373 amdgpu_bo_handle bo;
1374 amdgpu_bo_handle *resources;
1375 uint32_t *pm4;
1376 struct amdgpu_cs_ib_info *ib_info;
1377 struct amdgpu_cs_request *ibs_request;
1378 uint64_t bo_mc;
1379 volatile uint32_t *bo_cpu;
1380 int i, j, r, loop, ring_id;
1381 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1382 amdgpu_va_handle va_handle;
1383 struct drm_amdgpu_info_hw_ip hw_ip_info;
1384
1385 pm4 = calloc(pm4_dw, sizeof(*pm4));
1386 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1387
1388 ib_info = calloc(1, sizeof(*ib_info));
1389 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1390
1391 ibs_request = calloc(1, sizeof(*ibs_request));
1392 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1393
1394 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1395 CU_ASSERT_EQUAL(r, 0);
1396
1397 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1398 CU_ASSERT_EQUAL(r, 0);
1399
1400 /* prepare resource */
1401 resources = calloc(1, sizeof(amdgpu_bo_handle));
1402 CU_ASSERT_NOT_EQUAL(resources, NULL);
1403
1404 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1405 loop = 0;
1406 while(loop < 2) {
1407 /* allocate UC bo for sDMA use */
1408 r = amdgpu_bo_alloc_and_map(device_handle,
1409 sdma_write_length, 4096,
1410 AMDGPU_GEM_DOMAIN_GTT,
1411 gtt_flags[loop], &bo, (void**)&bo_cpu,
1412 &bo_mc, &va_handle);
1413 CU_ASSERT_EQUAL(r, 0);
1414
1415 /* clear bo */
1416 memset((void*)bo_cpu, 0, sdma_write_length);
1417
1418 resources[0] = bo;
1419
1420 /* fulfill PM4: test DMA const fill */
1421 i = j = 0;
1422 if (ip_type == AMDGPU_HW_IP_DMA) {
1423 if (family_id == AMDGPU_FAMILY_SI) {
1424 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1425 0, 0, 0,
1426 sdma_write_length / 4);
1427 pm4[i++] = 0xfffffffc & bo_mc;
1428 pm4[i++] = 0xdeadbeaf;
1429 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1430 } else {
1431 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1432 SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1433 pm4[i++] = 0xffffffff & bo_mc;
1434 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1435 pm4[i++] = 0xdeadbeaf;
1436 if (family_id >= AMDGPU_FAMILY_AI)
1437 pm4[i++] = sdma_write_length - 1;
1438 else
1439 pm4[i++] = sdma_write_length;
1440 }
1441 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1442 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1443 if (family_id == AMDGPU_FAMILY_SI) {
1444 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1445 pm4[i++] = 0xdeadbeaf;
1446 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1447 PACKET3_DMA_DATA_SI_DST_SEL(0) |
1448 PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1449 PACKET3_DMA_DATA_SI_CP_SYNC;
1450 pm4[i++] = 0xffffffff & bo_mc;
1451 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1452 pm4[i++] = sdma_write_length;
1453 } else {
1454 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1455 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1456 PACKET3_DMA_DATA_DST_SEL(0) |
1457 PACKET3_DMA_DATA_SRC_SEL(2) |
1458 PACKET3_DMA_DATA_CP_SYNC;
1459 pm4[i++] = 0xdeadbeaf;
1460 pm4[i++] = 0;
1461 pm4[i++] = 0xfffffffc & bo_mc;
1462 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1463 pm4[i++] = sdma_write_length;
1464 }
1465 }
1466
1467 amdgpu_test_exec_cs_helper(context_handle,
1468 ip_type, ring_id,
1469 i, pm4,
1470 1, resources,
1471 ib_info, ibs_request);
1472
1473 /* verify if SDMA test result meets with expected */
1474 i = 0;
1475 while(i < (sdma_write_length / 4)) {
1476 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1477 }
1478
1479 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1480 sdma_write_length);
1481 CU_ASSERT_EQUAL(r, 0);
1482 loop++;
1483 }
1484 }
1485 /* clean resources */
1486 free(resources);
1487 free(ibs_request);
1488 free(ib_info);
1489 free(pm4);
1490
1491 /* end of test */
1492 r = amdgpu_cs_ctx_free(context_handle);
1493 CU_ASSERT_EQUAL(r, 0);
1494 }
1495
amdgpu_command_submission_sdma_const_fill(void)1496 static void amdgpu_command_submission_sdma_const_fill(void)
1497 {
1498 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1499 }
1500
amdgpu_command_submission_copy_linear_helper(unsigned ip_type)1501 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1502 {
1503 const int sdma_write_length = 1024;
1504 const int pm4_dw = 256;
1505 amdgpu_context_handle context_handle;
1506 amdgpu_bo_handle bo1, bo2;
1507 amdgpu_bo_handle *resources;
1508 uint32_t *pm4;
1509 struct amdgpu_cs_ib_info *ib_info;
1510 struct amdgpu_cs_request *ibs_request;
1511 uint64_t bo1_mc, bo2_mc;
1512 volatile unsigned char *bo1_cpu, *bo2_cpu;
1513 int i, j, r, loop1, loop2, ring_id;
1514 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1515 amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1516 struct drm_amdgpu_info_hw_ip hw_ip_info;
1517
1518 pm4 = calloc(pm4_dw, sizeof(*pm4));
1519 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1520
1521 ib_info = calloc(1, sizeof(*ib_info));
1522 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1523
1524 ibs_request = calloc(1, sizeof(*ibs_request));
1525 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1526
1527 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1528 CU_ASSERT_EQUAL(r, 0);
1529
1530 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1531 CU_ASSERT_EQUAL(r, 0);
1532
1533 /* prepare resource */
1534 resources = calloc(2, sizeof(amdgpu_bo_handle));
1535 CU_ASSERT_NOT_EQUAL(resources, NULL);
1536
1537 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1538 loop1 = loop2 = 0;
1539 /* run 9 circle to test all mapping combination */
1540 while(loop1 < 2) {
1541 while(loop2 < 2) {
1542 /* allocate UC bo1for sDMA use */
1543 r = amdgpu_bo_alloc_and_map(device_handle,
1544 sdma_write_length, 4096,
1545 AMDGPU_GEM_DOMAIN_GTT,
1546 gtt_flags[loop1], &bo1,
1547 (void**)&bo1_cpu, &bo1_mc,
1548 &bo1_va_handle);
1549 CU_ASSERT_EQUAL(r, 0);
1550
1551 /* set bo1 */
1552 memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1553
1554 /* allocate UC bo2 for sDMA use */
1555 r = amdgpu_bo_alloc_and_map(device_handle,
1556 sdma_write_length, 4096,
1557 AMDGPU_GEM_DOMAIN_GTT,
1558 gtt_flags[loop2], &bo2,
1559 (void**)&bo2_cpu, &bo2_mc,
1560 &bo2_va_handle);
1561 CU_ASSERT_EQUAL(r, 0);
1562
1563 /* clear bo2 */
1564 memset((void*)bo2_cpu, 0, sdma_write_length);
1565
1566 resources[0] = bo1;
1567 resources[1] = bo2;
1568
1569 /* fulfill PM4: test DMA copy linear */
1570 i = j = 0;
1571 if (ip_type == AMDGPU_HW_IP_DMA) {
1572 if (family_id == AMDGPU_FAMILY_SI) {
1573 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1574 0, 0, 0,
1575 sdma_write_length);
1576 pm4[i++] = 0xffffffff & bo2_mc;
1577 pm4[i++] = 0xffffffff & bo1_mc;
1578 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1579 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1580 } else {
1581 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1582 SDMA_COPY_SUB_OPCODE_LINEAR,
1583 0);
1584 if (family_id >= AMDGPU_FAMILY_AI)
1585 pm4[i++] = sdma_write_length - 1;
1586 else
1587 pm4[i++] = sdma_write_length;
1588 pm4[i++] = 0;
1589 pm4[i++] = 0xffffffff & bo1_mc;
1590 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1591 pm4[i++] = 0xffffffff & bo2_mc;
1592 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1593 }
1594 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1595 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1596 if (family_id == AMDGPU_FAMILY_SI) {
1597 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1598 pm4[i++] = 0xfffffffc & bo1_mc;
1599 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1600 PACKET3_DMA_DATA_SI_DST_SEL(0) |
1601 PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1602 PACKET3_DMA_DATA_SI_CP_SYNC |
1603 (0xffff00000000 & bo1_mc) >> 32;
1604 pm4[i++] = 0xfffffffc & bo2_mc;
1605 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1606 pm4[i++] = sdma_write_length;
1607 } else {
1608 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1609 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1610 PACKET3_DMA_DATA_DST_SEL(0) |
1611 PACKET3_DMA_DATA_SRC_SEL(0) |
1612 PACKET3_DMA_DATA_CP_SYNC;
1613 pm4[i++] = 0xfffffffc & bo1_mc;
1614 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1615 pm4[i++] = 0xfffffffc & bo2_mc;
1616 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1617 pm4[i++] = sdma_write_length;
1618 }
1619 }
1620
1621 amdgpu_test_exec_cs_helper(context_handle,
1622 ip_type, ring_id,
1623 i, pm4,
1624 2, resources,
1625 ib_info, ibs_request);
1626
1627 /* verify if SDMA test result meets with expected */
1628 i = 0;
1629 while(i < sdma_write_length) {
1630 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1631 }
1632 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1633 sdma_write_length);
1634 CU_ASSERT_EQUAL(r, 0);
1635 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1636 sdma_write_length);
1637 CU_ASSERT_EQUAL(r, 0);
1638 loop2++;
1639 }
1640 loop1++;
1641 }
1642 }
1643 /* clean resources */
1644 free(resources);
1645 free(ibs_request);
1646 free(ib_info);
1647 free(pm4);
1648
1649 /* end of test */
1650 r = amdgpu_cs_ctx_free(context_handle);
1651 CU_ASSERT_EQUAL(r, 0);
1652 }
1653
amdgpu_command_submission_sdma_copy_linear(void)1654 static void amdgpu_command_submission_sdma_copy_linear(void)
1655 {
1656 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
1657 }
1658
amdgpu_command_submission_sdma(void)1659 static void amdgpu_command_submission_sdma(void)
1660 {
1661 amdgpu_command_submission_sdma_write_linear();
1662 amdgpu_command_submission_sdma_const_fill();
1663 amdgpu_command_submission_sdma_copy_linear();
1664 }
1665
amdgpu_command_submission_multi_fence_wait_all(bool wait_all)1666 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1667 {
1668 amdgpu_context_handle context_handle;
1669 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1670 void *ib_result_cpu, *ib_result_ce_cpu;
1671 uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1672 struct amdgpu_cs_request ibs_request[2] = {0};
1673 struct amdgpu_cs_ib_info ib_info[2];
1674 struct amdgpu_cs_fence fence_status[2] = {0};
1675 uint32_t *ptr;
1676 uint32_t expired;
1677 amdgpu_bo_list_handle bo_list;
1678 amdgpu_va_handle va_handle, va_handle_ce;
1679 int r;
1680 int i = 0, ib_cs_num = 2;
1681
1682 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1683 CU_ASSERT_EQUAL(r, 0);
1684
1685 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1686 AMDGPU_GEM_DOMAIN_GTT, 0,
1687 &ib_result_handle, &ib_result_cpu,
1688 &ib_result_mc_address, &va_handle);
1689 CU_ASSERT_EQUAL(r, 0);
1690
1691 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1692 AMDGPU_GEM_DOMAIN_GTT, 0,
1693 &ib_result_ce_handle, &ib_result_ce_cpu,
1694 &ib_result_ce_mc_address, &va_handle_ce);
1695 CU_ASSERT_EQUAL(r, 0);
1696
1697 r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1698 ib_result_ce_handle, &bo_list);
1699 CU_ASSERT_EQUAL(r, 0);
1700
1701 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1702
1703 /* IT_SET_CE_DE_COUNTERS */
1704 ptr = ib_result_ce_cpu;
1705 if (family_id != AMDGPU_FAMILY_SI) {
1706 ptr[i++] = 0xc0008900;
1707 ptr[i++] = 0;
1708 }
1709 ptr[i++] = 0xc0008400;
1710 ptr[i++] = 1;
1711 ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1712 ib_info[0].size = i;
1713 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1714
1715 /* IT_WAIT_ON_CE_COUNTER */
1716 ptr = ib_result_cpu;
1717 ptr[0] = 0xc0008600;
1718 ptr[1] = 0x00000001;
1719 ib_info[1].ib_mc_address = ib_result_mc_address;
1720 ib_info[1].size = 2;
1721
1722 for (i = 0; i < ib_cs_num; i++) {
1723 ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1724 ibs_request[i].number_of_ibs = 2;
1725 ibs_request[i].ibs = ib_info;
1726 ibs_request[i].resources = bo_list;
1727 ibs_request[i].fence_info.handle = NULL;
1728 }
1729
1730 r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1731
1732 CU_ASSERT_EQUAL(r, 0);
1733
1734 for (i = 0; i < ib_cs_num; i++) {
1735 fence_status[i].context = context_handle;
1736 fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1737 fence_status[i].fence = ibs_request[i].seq_no;
1738 }
1739
1740 r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1741 AMDGPU_TIMEOUT_INFINITE,
1742 &expired, NULL);
1743 CU_ASSERT_EQUAL(r, 0);
1744
1745 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1746 ib_result_mc_address, 4096);
1747 CU_ASSERT_EQUAL(r, 0);
1748
1749 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
1750 ib_result_ce_mc_address, 4096);
1751 CU_ASSERT_EQUAL(r, 0);
1752
1753 r = amdgpu_bo_list_destroy(bo_list);
1754 CU_ASSERT_EQUAL(r, 0);
1755
1756 r = amdgpu_cs_ctx_free(context_handle);
1757 CU_ASSERT_EQUAL(r, 0);
1758 }
1759
amdgpu_command_submission_multi_fence(void)1760 static void amdgpu_command_submission_multi_fence(void)
1761 {
1762 amdgpu_command_submission_multi_fence_wait_all(true);
1763 amdgpu_command_submission_multi_fence_wait_all(false);
1764 }
1765
amdgpu_userptr_test(void)1766 static void amdgpu_userptr_test(void)
1767 {
1768 int i, r, j;
1769 uint32_t *pm4 = NULL;
1770 uint64_t bo_mc;
1771 void *ptr = NULL;
1772 int pm4_dw = 256;
1773 int sdma_write_length = 4;
1774 amdgpu_bo_handle handle;
1775 amdgpu_context_handle context_handle;
1776 struct amdgpu_cs_ib_info *ib_info;
1777 struct amdgpu_cs_request *ibs_request;
1778 amdgpu_bo_handle buf_handle;
1779 amdgpu_va_handle va_handle;
1780
1781 pm4 = calloc(pm4_dw, sizeof(*pm4));
1782 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1783
1784 ib_info = calloc(1, sizeof(*ib_info));
1785 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1786
1787 ibs_request = calloc(1, sizeof(*ibs_request));
1788 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1789
1790 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1791 CU_ASSERT_EQUAL(r, 0);
1792
1793 posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
1794 CU_ASSERT_NOT_EQUAL(ptr, NULL);
1795 memset(ptr, 0, BUFFER_SIZE);
1796
1797 r = amdgpu_create_bo_from_user_mem(device_handle,
1798 ptr, BUFFER_SIZE, &buf_handle);
1799 CU_ASSERT_EQUAL(r, 0);
1800
1801 r = amdgpu_va_range_alloc(device_handle,
1802 amdgpu_gpu_va_range_general,
1803 BUFFER_SIZE, 1, 0, &bo_mc,
1804 &va_handle, 0);
1805 CU_ASSERT_EQUAL(r, 0);
1806
1807 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
1808 CU_ASSERT_EQUAL(r, 0);
1809
1810 handle = buf_handle;
1811
1812 j = i = 0;
1813
1814 if (family_id == AMDGPU_FAMILY_SI)
1815 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1816 sdma_write_length);
1817 else
1818 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1819 SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
1820 pm4[i++] = 0xffffffff & bo_mc;
1821 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1822 if (family_id >= AMDGPU_FAMILY_AI)
1823 pm4[i++] = sdma_write_length - 1;
1824 else if (family_id != AMDGPU_FAMILY_SI)
1825 pm4[i++] = sdma_write_length;
1826
1827 while (j++ < sdma_write_length)
1828 pm4[i++] = 0xdeadbeaf;
1829
1830 if (!fork()) {
1831 pm4[0] = 0x0;
1832 exit(0);
1833 }
1834
1835 amdgpu_test_exec_cs_helper(context_handle,
1836 AMDGPU_HW_IP_DMA, 0,
1837 i, pm4,
1838 1, &handle,
1839 ib_info, ibs_request);
1840 i = 0;
1841 while (i < sdma_write_length) {
1842 CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
1843 }
1844 free(ibs_request);
1845 free(ib_info);
1846 free(pm4);
1847
1848 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
1849 CU_ASSERT_EQUAL(r, 0);
1850 r = amdgpu_va_range_free(va_handle);
1851 CU_ASSERT_EQUAL(r, 0);
1852 r = amdgpu_bo_free(buf_handle);
1853 CU_ASSERT_EQUAL(r, 0);
1854 free(ptr);
1855
1856 r = amdgpu_cs_ctx_free(context_handle);
1857 CU_ASSERT_EQUAL(r, 0);
1858
1859 wait(NULL);
1860 }
1861
amdgpu_sync_dependency_test(void)1862 static void amdgpu_sync_dependency_test(void)
1863 {
1864 amdgpu_context_handle context_handle[2];
1865 amdgpu_bo_handle ib_result_handle;
1866 void *ib_result_cpu;
1867 uint64_t ib_result_mc_address;
1868 struct amdgpu_cs_request ibs_request;
1869 struct amdgpu_cs_ib_info ib_info;
1870 struct amdgpu_cs_fence fence_status;
1871 uint32_t expired;
1872 int i, j, r;
1873 amdgpu_bo_list_handle bo_list;
1874 amdgpu_va_handle va_handle;
1875 static uint32_t *ptr;
1876 uint64_t seq_no;
1877
1878 r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
1879 CU_ASSERT_EQUAL(r, 0);
1880 r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
1881 CU_ASSERT_EQUAL(r, 0);
1882
1883 r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
1884 AMDGPU_GEM_DOMAIN_GTT, 0,
1885 &ib_result_handle, &ib_result_cpu,
1886 &ib_result_mc_address, &va_handle);
1887 CU_ASSERT_EQUAL(r, 0);
1888
1889 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1890 &bo_list);
1891 CU_ASSERT_EQUAL(r, 0);
1892
1893 ptr = ib_result_cpu;
1894 i = 0;
1895
1896 memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
1897
1898 /* Dispatch minimal init config and verify it's executed */
1899 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
1900 ptr[i++] = 0x80000000;
1901 ptr[i++] = 0x80000000;
1902
1903 ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
1904 ptr[i++] = 0x80000000;
1905
1906
1907 /* Program compute regs */
1908 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1909 ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1910 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
1911 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
1912
1913
1914 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1915 ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
1916 /*
1917 * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0
1918 SGPRS = 1
1919 PRIORITY = 0
1920 FLOAT_MODE = 192 (0xc0)
1921 PRIV = 0
1922 DX10_CLAMP = 1
1923 DEBUG_MODE = 0
1924 IEEE_MODE = 0
1925 BULKY = 0
1926 CDBG_USER = 0
1927 *
1928 */
1929 ptr[i++] = 0x002c0040;
1930
1931
1932 /*
1933 * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
1934 USER_SGPR = 8
1935 TRAP_PRESENT = 0
1936 TGID_X_EN = 0
1937 TGID_Y_EN = 0
1938 TGID_Z_EN = 0
1939 TG_SIZE_EN = 0
1940 TIDIG_COMP_CNT = 0
1941 EXCP_EN_MSB = 0
1942 LDS_SIZE = 0
1943 EXCP_EN = 0
1944 *
1945 */
1946 ptr[i++] = 0x00000010;
1947
1948
1949 /*
1950 * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
1951 WAVESIZE = 0
1952 *
1953 */
1954 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
1955 ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
1956 ptr[i++] = 0x00000100;
1957
1958 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1959 ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
1960 ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
1961 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
1962
1963 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
1964 ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
1965 ptr[i++] = 0;
1966
1967 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
1968 ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
1969 ptr[i++] = 1;
1970 ptr[i++] = 1;
1971 ptr[i++] = 1;
1972
1973
1974 /* Dispatch */
1975 ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1976 ptr[i++] = 1;
1977 ptr[i++] = 1;
1978 ptr[i++] = 1;
1979 ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
1980
1981
1982 while (i & 7)
1983 ptr[i++] = 0xffff1000; /* type3 nop packet */
1984
1985 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1986 ib_info.ib_mc_address = ib_result_mc_address;
1987 ib_info.size = i;
1988
1989 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1990 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
1991 ibs_request.ring = 0;
1992 ibs_request.number_of_ibs = 1;
1993 ibs_request.ibs = &ib_info;
1994 ibs_request.resources = bo_list;
1995 ibs_request.fence_info.handle = NULL;
1996
1997 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
1998 CU_ASSERT_EQUAL(r, 0);
1999 seq_no = ibs_request.seq_no;
2000
2001
2002
2003 /* Prepare second command with dependency on the first */
2004 j = i;
2005 ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
2006 ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
2007 ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
2008 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2009 ptr[i++] = 99;
2010
2011 while (i & 7)
2012 ptr[i++] = 0xffff1000; /* type3 nop packet */
2013
2014 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2015 ib_info.ib_mc_address = ib_result_mc_address + j * 4;
2016 ib_info.size = i - j;
2017
2018 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2019 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2020 ibs_request.ring = 0;
2021 ibs_request.number_of_ibs = 1;
2022 ibs_request.ibs = &ib_info;
2023 ibs_request.resources = bo_list;
2024 ibs_request.fence_info.handle = NULL;
2025
2026 ibs_request.number_of_dependencies = 1;
2027
2028 ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
2029 ibs_request.dependencies[0].context = context_handle[1];
2030 ibs_request.dependencies[0].ip_instance = 0;
2031 ibs_request.dependencies[0].ring = 0;
2032 ibs_request.dependencies[0].fence = seq_no;
2033
2034
2035 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
2036 CU_ASSERT_EQUAL(r, 0);
2037
2038
2039 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
2040 fence_status.context = context_handle[0];
2041 fence_status.ip_type = AMDGPU_HW_IP_GFX;
2042 fence_status.ip_instance = 0;
2043 fence_status.ring = 0;
2044 fence_status.fence = ibs_request.seq_no;
2045
2046 r = amdgpu_cs_query_fence_status(&fence_status,
2047 AMDGPU_TIMEOUT_INFINITE,0, &expired);
2048 CU_ASSERT_EQUAL(r, 0);
2049
2050 /* Expect the second command to wait for shader to complete */
2051 CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
2052
2053 r = amdgpu_bo_list_destroy(bo_list);
2054 CU_ASSERT_EQUAL(r, 0);
2055
2056 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2057 ib_result_mc_address, 4096);
2058 CU_ASSERT_EQUAL(r, 0);
2059
2060 r = amdgpu_cs_ctx_free(context_handle[0]);
2061 CU_ASSERT_EQUAL(r, 0);
2062 r = amdgpu_cs_ctx_free(context_handle[1]);
2063 CU_ASSERT_EQUAL(r, 0);
2064
2065 free(ibs_request.dependencies);
2066 }
2067
amdgpu_dispatch_load_cs_shader(uint8_t * ptr,int cs_type)2068 static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
2069 int cs_type)
2070 {
2071 uint32_t shader_size;
2072 const uint32_t *shader;
2073
2074 switch (cs_type) {
2075 case CS_BUFFERCLEAR:
2076 shader = bufferclear_cs_shader_gfx9;
2077 shader_size = sizeof(bufferclear_cs_shader_gfx9);
2078 break;
2079 case CS_BUFFERCOPY:
2080 shader = buffercopy_cs_shader_gfx9;
2081 shader_size = sizeof(buffercopy_cs_shader_gfx9);
2082 break;
2083 default:
2084 return -1;
2085 break;
2086 }
2087
2088 memcpy(ptr, shader, shader_size);
2089 return 0;
2090 }
2091
amdgpu_dispatch_init(uint32_t * ptr,uint32_t ip_type)2092 static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type)
2093 {
2094 int i = 0;
2095
2096 /* Write context control and load shadowing register if necessary */
2097 if (ip_type == AMDGPU_HW_IP_GFX) {
2098 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2099 ptr[i++] = 0x80000000;
2100 ptr[i++] = 0x80000000;
2101 }
2102
2103 /* Issue commands to set default compute state. */
2104 /* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
2105 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
2106 ptr[i++] = 0x204;
2107 i += 3;
2108
2109 /* clear mmCOMPUTE_TMPRING_SIZE */
2110 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2111 ptr[i++] = 0x218;
2112 ptr[i++] = 0;
2113
2114 return i;
2115 }
2116
amdgpu_dispatch_write_cumask(uint32_t * ptr)2117 static int amdgpu_dispatch_write_cumask(uint32_t *ptr)
2118 {
2119 int i = 0;
2120
2121 /* Issue commands to set cu mask used in current dispatch */
2122 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
2123 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2124 ptr[i++] = 0x216;
2125 ptr[i++] = 0xffffffff;
2126 ptr[i++] = 0xffffffff;
2127 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
2128 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2129 ptr[i++] = 0x219;
2130 ptr[i++] = 0xffffffff;
2131 ptr[i++] = 0xffffffff;
2132
2133 return i;
2134 }
2135
amdgpu_dispatch_write2hw(uint32_t * ptr,uint64_t shader_addr)2136 static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr)
2137 {
2138 int i, j;
2139
2140 i = 0;
2141
2142 /* Writes shader state to HW */
2143 /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
2144 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2145 ptr[i++] = 0x20c;
2146 ptr[i++] = (shader_addr >> 8);
2147 ptr[i++] = (shader_addr >> 40);
2148 /* write sh regs*/
2149 for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
2150 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2151 /* - Gfx9ShRegBase */
2152 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
2153 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
2154 }
2155
2156 return i;
2157 }
2158
amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring)2159 static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
2160 uint32_t ip_type,
2161 uint32_t ring)
2162 {
2163 amdgpu_context_handle context_handle;
2164 amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
2165 volatile unsigned char *ptr_dst;
2166 void *ptr_shader;
2167 uint32_t *ptr_cmd;
2168 uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
2169 amdgpu_va_handle va_dst, va_shader, va_cmd;
2170 int i, r;
2171 int bo_dst_size = 16384;
2172 int bo_shader_size = 4096;
2173 int bo_cmd_size = 4096;
2174 struct amdgpu_cs_request ibs_request = {0};
2175 struct amdgpu_cs_ib_info ib_info= {0};
2176 amdgpu_bo_list_handle bo_list;
2177 struct amdgpu_cs_fence fence_status = {0};
2178 uint32_t expired;
2179
2180 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2181 CU_ASSERT_EQUAL(r, 0);
2182
2183 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2184 AMDGPU_GEM_DOMAIN_GTT, 0,
2185 &bo_cmd, (void **)&ptr_cmd,
2186 &mc_address_cmd, &va_cmd);
2187 CU_ASSERT_EQUAL(r, 0);
2188 memset(ptr_cmd, 0, bo_cmd_size);
2189
2190 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2191 AMDGPU_GEM_DOMAIN_VRAM, 0,
2192 &bo_shader, &ptr_shader,
2193 &mc_address_shader, &va_shader);
2194 CU_ASSERT_EQUAL(r, 0);
2195 memset(ptr_shader, 0, bo_shader_size);
2196
2197 r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR);
2198 CU_ASSERT_EQUAL(r, 0);
2199
2200 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2201 AMDGPU_GEM_DOMAIN_VRAM, 0,
2202 &bo_dst, (void **)&ptr_dst,
2203 &mc_address_dst, &va_dst);
2204 CU_ASSERT_EQUAL(r, 0);
2205
2206 i = 0;
2207 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2208
2209 /* Issue commands to set cu mask used in current dispatch */
2210 i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2211
2212 /* Writes shader state to HW */
2213 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2214
2215 /* Write constant data */
2216 /* Writes the UAV constant data to the SGPRs. */
2217 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2218 ptr_cmd[i++] = 0x240;
2219 ptr_cmd[i++] = mc_address_dst;
2220 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2221 ptr_cmd[i++] = 0x400;
2222 ptr_cmd[i++] = 0x74fac;
2223
2224 /* Sets a range of pixel shader constants */
2225 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2226 ptr_cmd[i++] = 0x244;
2227 ptr_cmd[i++] = 0x22222222;
2228 ptr_cmd[i++] = 0x22222222;
2229 ptr_cmd[i++] = 0x22222222;
2230 ptr_cmd[i++] = 0x22222222;
2231
2232 /* clear mmCOMPUTE_RESOURCE_LIMITS */
2233 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2234 ptr_cmd[i++] = 0x215;
2235 ptr_cmd[i++] = 0;
2236
2237 /* dispatch direct command */
2238 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2239 ptr_cmd[i++] = 0x10;
2240 ptr_cmd[i++] = 1;
2241 ptr_cmd[i++] = 1;
2242 ptr_cmd[i++] = 1;
2243
2244 while (i & 7)
2245 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2246
2247 resources[0] = bo_dst;
2248 resources[1] = bo_shader;
2249 resources[2] = bo_cmd;
2250 r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
2251 CU_ASSERT_EQUAL(r, 0);
2252
2253 ib_info.ib_mc_address = mc_address_cmd;
2254 ib_info.size = i;
2255 ibs_request.ip_type = ip_type;
2256 ibs_request.ring = ring;
2257 ibs_request.resources = bo_list;
2258 ibs_request.number_of_ibs = 1;
2259 ibs_request.ibs = &ib_info;
2260 ibs_request.fence_info.handle = NULL;
2261
2262 /* submit CS */
2263 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2264 CU_ASSERT_EQUAL(r, 0);
2265
2266 r = amdgpu_bo_list_destroy(bo_list);
2267 CU_ASSERT_EQUAL(r, 0);
2268
2269 fence_status.ip_type = ip_type;
2270 fence_status.ip_instance = 0;
2271 fence_status.ring = ring;
2272 fence_status.context = context_handle;
2273 fence_status.fence = ibs_request.seq_no;
2274
2275 /* wait for IB accomplished */
2276 r = amdgpu_cs_query_fence_status(&fence_status,
2277 AMDGPU_TIMEOUT_INFINITE,
2278 0, &expired);
2279 CU_ASSERT_EQUAL(r, 0);
2280 CU_ASSERT_EQUAL(expired, true);
2281
2282 /* verify if memset test result meets with expected */
2283 i = 0;
2284 while(i < bo_dst_size) {
2285 CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
2286 }
2287
2288 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2289 CU_ASSERT_EQUAL(r, 0);
2290
2291 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2292 CU_ASSERT_EQUAL(r, 0);
2293
2294 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2295 CU_ASSERT_EQUAL(r, 0);
2296
2297 r = amdgpu_cs_ctx_free(context_handle);
2298 CU_ASSERT_EQUAL(r, 0);
2299 }
2300
amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring)2301 static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
2302 uint32_t ip_type,
2303 uint32_t ring)
2304 {
2305 amdgpu_context_handle context_handle;
2306 amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2307 volatile unsigned char *ptr_dst;
2308 void *ptr_shader;
2309 unsigned char *ptr_src;
2310 uint32_t *ptr_cmd;
2311 uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2312 amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2313 int i, r;
2314 int bo_dst_size = 16384;
2315 int bo_shader_size = 4096;
2316 int bo_cmd_size = 4096;
2317 struct amdgpu_cs_request ibs_request = {0};
2318 struct amdgpu_cs_ib_info ib_info= {0};
2319 uint32_t expired;
2320 amdgpu_bo_list_handle bo_list;
2321 struct amdgpu_cs_fence fence_status = {0};
2322
2323 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2324 CU_ASSERT_EQUAL(r, 0);
2325
2326 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2327 AMDGPU_GEM_DOMAIN_GTT, 0,
2328 &bo_cmd, (void **)&ptr_cmd,
2329 &mc_address_cmd, &va_cmd);
2330 CU_ASSERT_EQUAL(r, 0);
2331 memset(ptr_cmd, 0, bo_cmd_size);
2332
2333 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2334 AMDGPU_GEM_DOMAIN_VRAM, 0,
2335 &bo_shader, &ptr_shader,
2336 &mc_address_shader, &va_shader);
2337 CU_ASSERT_EQUAL(r, 0);
2338 memset(ptr_shader, 0, bo_shader_size);
2339
2340 r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCOPY );
2341 CU_ASSERT_EQUAL(r, 0);
2342
2343 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2344 AMDGPU_GEM_DOMAIN_VRAM, 0,
2345 &bo_src, (void **)&ptr_src,
2346 &mc_address_src, &va_src);
2347 CU_ASSERT_EQUAL(r, 0);
2348
2349 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2350 AMDGPU_GEM_DOMAIN_VRAM, 0,
2351 &bo_dst, (void **)&ptr_dst,
2352 &mc_address_dst, &va_dst);
2353 CU_ASSERT_EQUAL(r, 0);
2354
2355 memset(ptr_src, 0x55, bo_dst_size);
2356
2357 i = 0;
2358 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2359
2360 /* Issue commands to set cu mask used in current dispatch */
2361 i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2362
2363 /* Writes shader state to HW */
2364 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2365
2366 /* Write constant data */
2367 /* Writes the texture resource constants data to the SGPRs */
2368 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2369 ptr_cmd[i++] = 0x240;
2370 ptr_cmd[i++] = mc_address_src;
2371 ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2372 ptr_cmd[i++] = 0x400;
2373 ptr_cmd[i++] = 0x74fac;
2374
2375 /* Writes the UAV constant data to the SGPRs. */
2376 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2377 ptr_cmd[i++] = 0x244;
2378 ptr_cmd[i++] = mc_address_dst;
2379 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2380 ptr_cmd[i++] = 0x400;
2381 ptr_cmd[i++] = 0x74fac;
2382
2383 /* clear mmCOMPUTE_RESOURCE_LIMITS */
2384 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2385 ptr_cmd[i++] = 0x215;
2386 ptr_cmd[i++] = 0;
2387
2388 /* dispatch direct command */
2389 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2390 ptr_cmd[i++] = 0x10;
2391 ptr_cmd[i++] = 1;
2392 ptr_cmd[i++] = 1;
2393 ptr_cmd[i++] = 1;
2394
2395 while (i & 7)
2396 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2397
2398 resources[0] = bo_shader;
2399 resources[1] = bo_src;
2400 resources[2] = bo_dst;
2401 resources[3] = bo_cmd;
2402 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2403 CU_ASSERT_EQUAL(r, 0);
2404
2405 ib_info.ib_mc_address = mc_address_cmd;
2406 ib_info.size = i;
2407 ibs_request.ip_type = ip_type;
2408 ibs_request.ring = ring;
2409 ibs_request.resources = bo_list;
2410 ibs_request.number_of_ibs = 1;
2411 ibs_request.ibs = &ib_info;
2412 ibs_request.fence_info.handle = NULL;
2413 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2414 CU_ASSERT_EQUAL(r, 0);
2415
2416 fence_status.ip_type = ip_type;
2417 fence_status.ip_instance = 0;
2418 fence_status.ring = ring;
2419 fence_status.context = context_handle;
2420 fence_status.fence = ibs_request.seq_no;
2421
2422 /* wait for IB accomplished */
2423 r = amdgpu_cs_query_fence_status(&fence_status,
2424 AMDGPU_TIMEOUT_INFINITE,
2425 0, &expired);
2426 CU_ASSERT_EQUAL(r, 0);
2427 CU_ASSERT_EQUAL(expired, true);
2428
2429 /* verify if memcpy test result meets with expected */
2430 i = 0;
2431 while(i < bo_dst_size) {
2432 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
2433 i++;
2434 }
2435
2436 r = amdgpu_bo_list_destroy(bo_list);
2437 CU_ASSERT_EQUAL(r, 0);
2438
2439 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2440 CU_ASSERT_EQUAL(r, 0);
2441 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2442 CU_ASSERT_EQUAL(r, 0);
2443
2444 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2445 CU_ASSERT_EQUAL(r, 0);
2446
2447 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2448 CU_ASSERT_EQUAL(r, 0);
2449
2450 r = amdgpu_cs_ctx_free(context_handle);
2451 CU_ASSERT_EQUAL(r, 0);
2452 }
2453
amdgpu_compute_dispatch_test(void)2454 static void amdgpu_compute_dispatch_test(void)
2455 {
2456 int r;
2457 struct drm_amdgpu_info_hw_ip info;
2458 uint32_t ring_id;
2459
2460 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
2461 CU_ASSERT_EQUAL(r, 0);
2462 if (!info.available_rings)
2463 printf("SKIP ... as there's no compute ring\n");
2464
2465 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2466 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
2467 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
2468 }
2469 }
2470
amdgpu_gfx_dispatch_test(void)2471 static void amdgpu_gfx_dispatch_test(void)
2472 {
2473 int r;
2474 struct drm_amdgpu_info_hw_ip info;
2475 uint32_t ring_id;
2476
2477 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2478 CU_ASSERT_EQUAL(r, 0);
2479 if (!info.available_rings)
2480 printf("SKIP ... as there's no graphics ring\n");
2481
2482 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2483 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
2484 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
2485 }
2486 }
2487
amdgpu_draw_load_ps_shader(uint8_t * ptr,int ps_type)2488 static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type)
2489 {
2490 int i;
2491 uint32_t shader_offset= 256;
2492 uint32_t mem_offset, patch_code_offset;
2493 uint32_t shader_size, patchinfo_code_size;
2494 const uint32_t *shader;
2495 const uint32_t *patchinfo_code;
2496 const uint32_t *patchcode_offset;
2497
2498 switch (ps_type) {
2499 case PS_CONST:
2500 shader = ps_const_shader_gfx9;
2501 shader_size = sizeof(ps_const_shader_gfx9);
2502 patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
2503 patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
2504 patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
2505 break;
2506 case PS_TEX:
2507 shader = ps_tex_shader_gfx9;
2508 shader_size = sizeof(ps_tex_shader_gfx9);
2509 patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
2510 patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
2511 patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
2512 break;
2513 default:
2514 return -1;
2515 break;
2516 }
2517
2518 /* write main shader program */
2519 for (i = 0 ; i < 10; i++) {
2520 mem_offset = i * shader_offset;
2521 memcpy(ptr + mem_offset, shader, shader_size);
2522 }
2523
2524 /* overwrite patch codes */
2525 for (i = 0 ; i < 10; i++) {
2526 mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
2527 patch_code_offset = i * patchinfo_code_size;
2528 memcpy(ptr + mem_offset,
2529 patchinfo_code + patch_code_offset,
2530 patchinfo_code_size * sizeof(uint32_t));
2531 }
2532
2533 return 0;
2534 }
2535
2536 /* load RectPosTexFast_VS */
amdgpu_draw_load_vs_shader(uint8_t * ptr)2537 static int amdgpu_draw_load_vs_shader(uint8_t *ptr)
2538 {
2539 const uint32_t *shader;
2540 uint32_t shader_size;
2541
2542 shader = vs_RectPosTexFast_shader_gfx9;
2543 shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
2544
2545 memcpy(ptr, shader, shader_size);
2546
2547 return 0;
2548 }
2549
amdgpu_draw_init(uint32_t * ptr)2550 static int amdgpu_draw_init(uint32_t *ptr)
2551 {
2552 int i = 0;
2553 const uint32_t *preamblecache_ptr;
2554 uint32_t preamblecache_size;
2555
2556 /* Write context control and load shadowing register if necessary */
2557 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2558 ptr[i++] = 0x80000000;
2559 ptr[i++] = 0x80000000;
2560
2561 preamblecache_ptr = preamblecache_gfx9;
2562 preamblecache_size = sizeof(preamblecache_gfx9);
2563
2564 memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
2565 return i + preamblecache_size/sizeof(uint32_t);
2566 }
2567
amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t * ptr,uint64_t dst_addr)2568 static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
2569 uint64_t dst_addr)
2570 {
2571 int i = 0;
2572
2573 /* setup color buffer */
2574 /* offset reg
2575 0xA318 CB_COLOR0_BASE
2576 0xA319 CB_COLOR0_BASE_EXT
2577 0xA31A CB_COLOR0_ATTRIB2
2578 0xA31B CB_COLOR0_VIEW
2579 0xA31C CB_COLOR0_INFO
2580 0xA31D CB_COLOR0_ATTRIB
2581 0xA31E CB_COLOR0_DCC_CONTROL
2582 0xA31F CB_COLOR0_CMASK
2583 0xA320 CB_COLOR0_CMASK_BASE_EXT
2584 0xA321 CB_COLOR0_FMASK
2585 0xA322 CB_COLOR0_FMASK_BASE_EXT
2586 0xA323 CB_COLOR0_CLEAR_WORD0
2587 0xA324 CB_COLOR0_CLEAR_WORD1
2588 0xA325 CB_COLOR0_DCC_BASE
2589 0xA326 CB_COLOR0_DCC_BASE_EXT */
2590 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
2591 ptr[i++] = 0x318;
2592 ptr[i++] = dst_addr >> 8;
2593 ptr[i++] = dst_addr >> 40;
2594 ptr[i++] = 0x7c01f;
2595 ptr[i++] = 0;
2596 ptr[i++] = 0x50438;
2597 ptr[i++] = 0x10140000;
2598 i += 9;
2599
2600 /* mmCB_MRT0_EPITCH */
2601 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2602 ptr[i++] = 0x1e8;
2603 ptr[i++] = 0x1f;
2604
2605 /* 0xA32B CB_COLOR1_BASE */
2606 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2607 ptr[i++] = 0x32b;
2608 ptr[i++] = 0;
2609
2610 /* 0xA33A CB_COLOR1_BASE */
2611 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2612 ptr[i++] = 0x33a;
2613 ptr[i++] = 0;
2614
2615 /* SPI_SHADER_COL_FORMAT */
2616 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2617 ptr[i++] = 0x1c5;
2618 ptr[i++] = 9;
2619
2620 /* Setup depth buffer */
2621 /* mmDB_Z_INFO */
2622 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
2623 ptr[i++] = 0xe;
2624 i += 2;
2625
2626 return i;
2627 }
2628
amdgpu_draw_setup_and_write_drawblt_state(uint32_t * ptr)2629 static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr)
2630 {
2631 int i = 0;
2632 const uint32_t *cached_cmd_ptr;
2633 uint32_t cached_cmd_size;
2634
2635 /* mmPA_SC_TILE_STEERING_OVERRIDE */
2636 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2637 ptr[i++] = 0xd7;
2638 ptr[i++] = 0;
2639
2640 ptr[i++] = 0xffff1000;
2641 ptr[i++] = 0xc0021000;
2642
2643 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2644 ptr[i++] = 0xd7;
2645 ptr[i++] = 1;
2646
2647 /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
2648 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
2649 ptr[i++] = 0x2fe;
2650 i += 16;
2651
2652 /* mmPA_SC_CENTROID_PRIORITY_0 */
2653 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
2654 ptr[i++] = 0x2f5;
2655 i += 2;
2656
2657 cached_cmd_ptr = cached_cmd_gfx9;
2658 cached_cmd_size = sizeof(cached_cmd_gfx9);
2659
2660 memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
2661 i += cached_cmd_size/sizeof(uint32_t);
2662
2663 return i;
2664 }
2665
amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr)2666 static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
2667 int ps_type,
2668 uint64_t shader_addr)
2669 {
2670 int i = 0;
2671
2672 /* mmPA_CL_VS_OUT_CNTL */
2673 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2674 ptr[i++] = 0x207;
2675 ptr[i++] = 0;
2676
2677 /* mmSPI_SHADER_PGM_RSRC3_VS */
2678 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2679 ptr[i++] = 0x46;
2680 ptr[i++] = 0xffff;
2681
2682 /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
2683 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2684 ptr[i++] = 0x48;
2685 ptr[i++] = shader_addr >> 8;
2686 ptr[i++] = shader_addr >> 40;
2687
2688 /* mmSPI_SHADER_PGM_RSRC1_VS */
2689 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2690 ptr[i++] = 0x4a;
2691 ptr[i++] = 0xc0081;
2692 /* mmSPI_SHADER_PGM_RSRC2_VS */
2693 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2694 ptr[i++] = 0x4b;
2695 ptr[i++] = 0x18;
2696
2697 /* mmSPI_VS_OUT_CONFIG */
2698 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2699 ptr[i++] = 0x1b1;
2700 ptr[i++] = 2;
2701
2702 /* mmSPI_SHADER_POS_FORMAT */
2703 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2704 ptr[i++] = 0x1c3;
2705 ptr[i++] = 4;
2706
2707 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
2708 ptr[i++] = 0x4c;
2709 i += 2;
2710 ptr[i++] = 0x42000000;
2711 ptr[i++] = 0x42000000;
2712
2713 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
2714 ptr[i++] = 0x50;
2715 i += 2;
2716 if (ps_type == PS_CONST) {
2717 i += 2;
2718 } else if (ps_type == PS_TEX) {
2719 ptr[i++] = 0x3f800000;
2720 ptr[i++] = 0x3f800000;
2721 }
2722
2723 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
2724 ptr[i++] = 0x54;
2725 i += 4;
2726
2727 return i;
2728 }
2729
amdgpu_draw_ps_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr)2730 static int amdgpu_draw_ps_write2hw(uint32_t *ptr,
2731 int ps_type,
2732 uint64_t shader_addr)
2733 {
2734 int i, j;
2735 const uint32_t *sh_registers;
2736 const uint32_t *context_registers;
2737 uint32_t num_sh_reg, num_context_reg;
2738
2739 if (ps_type == PS_CONST) {
2740 sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
2741 context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
2742 num_sh_reg = ps_num_sh_registers_gfx9;
2743 num_context_reg = ps_num_context_registers_gfx9;
2744 } else if (ps_type == PS_TEX) {
2745 sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
2746 context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
2747 num_sh_reg = ps_num_sh_registers_gfx9;
2748 num_context_reg = ps_num_context_registers_gfx9;
2749 }
2750
2751 i = 0;
2752
2753 /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS
2754 0x2c08 SPI_SHADER_PGM_LO_PS
2755 0x2c09 SPI_SHADER_PGM_HI_PS */
2756 shader_addr += 256 * 9;
2757 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
2758 ptr[i++] = 0x7;
2759 ptr[i++] = 0xffff;
2760 ptr[i++] = shader_addr >> 8;
2761 ptr[i++] = shader_addr >> 40;
2762
2763 for (j = 0; j < num_sh_reg; j++) {
2764 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2765 ptr[i++] = sh_registers[j * 2] - 0x2c00;
2766 ptr[i++] = sh_registers[j * 2 + 1];
2767 }
2768
2769 for (j = 0; j < num_context_reg; j++) {
2770 if (context_registers[j * 2] != 0xA1C5) {
2771 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2772 ptr[i++] = context_registers[j * 2] - 0xa000;
2773 ptr[i++] = context_registers[j * 2 + 1];
2774 }
2775
2776 if (context_registers[j * 2] == 0xA1B4) {
2777 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2778 ptr[i++] = 0x1b3;
2779 ptr[i++] = 2;
2780 }
2781 }
2782
2783 return i;
2784 }
2785
amdgpu_draw_draw(uint32_t * ptr)2786 static int amdgpu_draw_draw(uint32_t *ptr)
2787 {
2788 int i = 0;
2789
2790 /* mmIA_MULTI_VGT_PARAM */
2791 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2792 ptr[i++] = 0x40000258;
2793 ptr[i++] = 0xd00ff;
2794
2795 /* mmVGT_PRIMITIVE_TYPE */
2796 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2797 ptr[i++] = 0x10000242;
2798 ptr[i++] = 0x11;
2799
2800 ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
2801 ptr[i++] = 3;
2802 ptr[i++] = 2;
2803
2804 return i;
2805 }
2806
amdgpu_memset_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring_id)2807 void amdgpu_memset_draw(amdgpu_device_handle device_handle,
2808 amdgpu_bo_handle bo_shader_ps,
2809 amdgpu_bo_handle bo_shader_vs,
2810 uint64_t mc_address_shader_ps,
2811 uint64_t mc_address_shader_vs,
2812 uint32_t ring_id)
2813 {
2814 amdgpu_context_handle context_handle;
2815 amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
2816 volatile unsigned char *ptr_dst;
2817 uint32_t *ptr_cmd;
2818 uint64_t mc_address_dst, mc_address_cmd;
2819 amdgpu_va_handle va_dst, va_cmd;
2820 int i, r;
2821 int bo_dst_size = 16384;
2822 int bo_cmd_size = 4096;
2823 struct amdgpu_cs_request ibs_request = {0};
2824 struct amdgpu_cs_ib_info ib_info = {0};
2825 struct amdgpu_cs_fence fence_status = {0};
2826 uint32_t expired;
2827 amdgpu_bo_list_handle bo_list;
2828
2829 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2830 CU_ASSERT_EQUAL(r, 0);
2831
2832 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2833 AMDGPU_GEM_DOMAIN_GTT, 0,
2834 &bo_cmd, (void **)&ptr_cmd,
2835 &mc_address_cmd, &va_cmd);
2836 CU_ASSERT_EQUAL(r, 0);
2837 memset(ptr_cmd, 0, bo_cmd_size);
2838
2839 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2840 AMDGPU_GEM_DOMAIN_VRAM, 0,
2841 &bo_dst, (void **)&ptr_dst,
2842 &mc_address_dst, &va_dst);
2843 CU_ASSERT_EQUAL(r, 0);
2844
2845 i = 0;
2846 i += amdgpu_draw_init(ptr_cmd + i);
2847
2848 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst);
2849
2850 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i);
2851
2852 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs);
2853
2854 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps);
2855
2856 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
2857 ptr_cmd[i++] = 0xc;
2858 ptr_cmd[i++] = 0x33333333;
2859 ptr_cmd[i++] = 0x33333333;
2860 ptr_cmd[i++] = 0x33333333;
2861 ptr_cmd[i++] = 0x33333333;
2862
2863 i += amdgpu_draw_draw(ptr_cmd + i);
2864
2865 while (i & 7)
2866 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2867
2868 resources[0] = bo_dst;
2869 resources[1] = bo_shader_ps;
2870 resources[2] = bo_shader_vs;
2871 resources[3] = bo_cmd;
2872 r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
2873 CU_ASSERT_EQUAL(r, 0);
2874
2875 ib_info.ib_mc_address = mc_address_cmd;
2876 ib_info.size = i;
2877 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2878 ibs_request.ring = ring_id;
2879 ibs_request.resources = bo_list;
2880 ibs_request.number_of_ibs = 1;
2881 ibs_request.ibs = &ib_info;
2882 ibs_request.fence_info.handle = NULL;
2883
2884 /* submit CS */
2885 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2886 CU_ASSERT_EQUAL(r, 0);
2887
2888 r = amdgpu_bo_list_destroy(bo_list);
2889 CU_ASSERT_EQUAL(r, 0);
2890
2891 fence_status.ip_type = AMDGPU_HW_IP_GFX;
2892 fence_status.ip_instance = 0;
2893 fence_status.ring = ring_id;
2894 fence_status.context = context_handle;
2895 fence_status.fence = ibs_request.seq_no;
2896
2897 /* wait for IB accomplished */
2898 r = amdgpu_cs_query_fence_status(&fence_status,
2899 AMDGPU_TIMEOUT_INFINITE,
2900 0, &expired);
2901 CU_ASSERT_EQUAL(r, 0);
2902 CU_ASSERT_EQUAL(expired, true);
2903
2904 /* verify if memset test result meets with expected */
2905 i = 0;
2906 while(i < bo_dst_size) {
2907 CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
2908 }
2909
2910 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2911 CU_ASSERT_EQUAL(r, 0);
2912
2913 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2914 CU_ASSERT_EQUAL(r, 0);
2915
2916 r = amdgpu_cs_ctx_free(context_handle);
2917 CU_ASSERT_EQUAL(r, 0);
2918 }
2919
amdgpu_memset_draw_test(amdgpu_device_handle device_handle,uint32_t ring)2920 static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
2921 uint32_t ring)
2922 {
2923 amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
2924 void *ptr_shader_ps;
2925 void *ptr_shader_vs;
2926 uint64_t mc_address_shader_ps, mc_address_shader_vs;
2927 amdgpu_va_handle va_shader_ps, va_shader_vs;
2928 int r;
2929 int bo_shader_size = 4096;
2930
2931 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2932 AMDGPU_GEM_DOMAIN_VRAM, 0,
2933 &bo_shader_ps, &ptr_shader_ps,
2934 &mc_address_shader_ps, &va_shader_ps);
2935 CU_ASSERT_EQUAL(r, 0);
2936 memset(ptr_shader_ps, 0, bo_shader_size);
2937
2938 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2939 AMDGPU_GEM_DOMAIN_VRAM, 0,
2940 &bo_shader_vs, &ptr_shader_vs,
2941 &mc_address_shader_vs, &va_shader_vs);
2942 CU_ASSERT_EQUAL(r, 0);
2943 memset(ptr_shader_vs, 0, bo_shader_size);
2944
2945 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST);
2946 CU_ASSERT_EQUAL(r, 0);
2947
2948 r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
2949 CU_ASSERT_EQUAL(r, 0);
2950
2951 amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
2952 mc_address_shader_ps, mc_address_shader_vs, ring);
2953
2954 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
2955 CU_ASSERT_EQUAL(r, 0);
2956
2957 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
2958 CU_ASSERT_EQUAL(r, 0);
2959 }
2960
amdgpu_memcpy_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring)2961 static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
2962 amdgpu_bo_handle bo_shader_ps,
2963 amdgpu_bo_handle bo_shader_vs,
2964 uint64_t mc_address_shader_ps,
2965 uint64_t mc_address_shader_vs,
2966 uint32_t ring)
2967 {
2968 amdgpu_context_handle context_handle;
2969 amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
2970 volatile unsigned char *ptr_dst;
2971 unsigned char *ptr_src;
2972 uint32_t *ptr_cmd;
2973 uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
2974 amdgpu_va_handle va_dst, va_src, va_cmd;
2975 int i, r;
2976 int bo_size = 16384;
2977 int bo_cmd_size = 4096;
2978 struct amdgpu_cs_request ibs_request = {0};
2979 struct amdgpu_cs_ib_info ib_info= {0};
2980 uint32_t hang_state, hangs, expired;
2981 amdgpu_bo_list_handle bo_list;
2982 struct amdgpu_cs_fence fence_status = {0};
2983
2984 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2985 CU_ASSERT_EQUAL(r, 0);
2986
2987 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2988 AMDGPU_GEM_DOMAIN_GTT, 0,
2989 &bo_cmd, (void **)&ptr_cmd,
2990 &mc_address_cmd, &va_cmd);
2991 CU_ASSERT_EQUAL(r, 0);
2992 memset(ptr_cmd, 0, bo_cmd_size);
2993
2994 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
2995 AMDGPU_GEM_DOMAIN_VRAM, 0,
2996 &bo_src, (void **)&ptr_src,
2997 &mc_address_src, &va_src);
2998 CU_ASSERT_EQUAL(r, 0);
2999
3000 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3001 AMDGPU_GEM_DOMAIN_VRAM, 0,
3002 &bo_dst, (void **)&ptr_dst,
3003 &mc_address_dst, &va_dst);
3004 CU_ASSERT_EQUAL(r, 0);
3005
3006 memset(ptr_src, 0x55, bo_size);
3007
3008 i = 0;
3009 i += amdgpu_draw_init(ptr_cmd + i);
3010
3011 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst);
3012
3013 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i);
3014
3015 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs);
3016
3017 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3018
3019 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3020 ptr_cmd[i++] = 0xc;
3021 ptr_cmd[i++] = mc_address_src >> 8;
3022 ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3023 ptr_cmd[i++] = 0x7c01f;
3024 ptr_cmd[i++] = 0x90500fac;
3025 ptr_cmd[i++] = 0x3e000;
3026 i += 3;
3027
3028 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3029 ptr_cmd[i++] = 0x14;
3030 ptr_cmd[i++] = 0x92;
3031 i += 3;
3032
3033 ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3034 ptr_cmd[i++] = 0x191;
3035 ptr_cmd[i++] = 0;
3036
3037 i += amdgpu_draw_draw(ptr_cmd + i);
3038
3039 while (i & 7)
3040 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3041
3042 resources[0] = bo_dst;
3043 resources[1] = bo_src;
3044 resources[2] = bo_shader_ps;
3045 resources[3] = bo_shader_vs;
3046 resources[4] = bo_cmd;
3047 r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3048 CU_ASSERT_EQUAL(r, 0);
3049
3050 ib_info.ib_mc_address = mc_address_cmd;
3051 ib_info.size = i;
3052 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3053 ibs_request.ring = ring;
3054 ibs_request.resources = bo_list;
3055 ibs_request.number_of_ibs = 1;
3056 ibs_request.ibs = &ib_info;
3057 ibs_request.fence_info.handle = NULL;
3058 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3059 CU_ASSERT_EQUAL(r, 0);
3060
3061 fence_status.ip_type = AMDGPU_HW_IP_GFX;
3062 fence_status.ip_instance = 0;
3063 fence_status.ring = ring;
3064 fence_status.context = context_handle;
3065 fence_status.fence = ibs_request.seq_no;
3066
3067 /* wait for IB accomplished */
3068 r = amdgpu_cs_query_fence_status(&fence_status,
3069 AMDGPU_TIMEOUT_INFINITE,
3070 0, &expired);
3071 CU_ASSERT_EQUAL(r, 0);
3072 CU_ASSERT_EQUAL(expired, true);
3073
3074 /* verify if memcpy test result meets with expected */
3075 i = 0;
3076 while(i < bo_size) {
3077 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
3078 i++;
3079 }
3080
3081 r = amdgpu_bo_list_destroy(bo_list);
3082 CU_ASSERT_EQUAL(r, 0);
3083
3084 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3085 CU_ASSERT_EQUAL(r, 0);
3086 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3087 CU_ASSERT_EQUAL(r, 0);
3088
3089 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3090 CU_ASSERT_EQUAL(r, 0);
3091
3092 r = amdgpu_cs_ctx_free(context_handle);
3093 CU_ASSERT_EQUAL(r, 0);
3094 }
3095
amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle,uint32_t ring)3096 static void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring)
3097 {
3098 amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3099 void *ptr_shader_ps;
3100 void *ptr_shader_vs;
3101 uint64_t mc_address_shader_ps, mc_address_shader_vs;
3102 amdgpu_va_handle va_shader_ps, va_shader_vs;
3103 int bo_shader_size = 4096;
3104 int r;
3105
3106 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3107 AMDGPU_GEM_DOMAIN_VRAM, 0,
3108 &bo_shader_ps, &ptr_shader_ps,
3109 &mc_address_shader_ps, &va_shader_ps);
3110 CU_ASSERT_EQUAL(r, 0);
3111 memset(ptr_shader_ps, 0, bo_shader_size);
3112
3113 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3114 AMDGPU_GEM_DOMAIN_VRAM, 0,
3115 &bo_shader_vs, &ptr_shader_vs,
3116 &mc_address_shader_vs, &va_shader_vs);
3117 CU_ASSERT_EQUAL(r, 0);
3118 memset(ptr_shader_vs, 0, bo_shader_size);
3119
3120 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_TEX);
3121 CU_ASSERT_EQUAL(r, 0);
3122
3123 r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3124 CU_ASSERT_EQUAL(r, 0);
3125
3126 amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
3127 mc_address_shader_ps, mc_address_shader_vs, ring);
3128
3129 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3130 CU_ASSERT_EQUAL(r, 0);
3131
3132 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3133 CU_ASSERT_EQUAL(r, 0);
3134 }
3135
amdgpu_draw_test(void)3136 static void amdgpu_draw_test(void)
3137 {
3138 int r;
3139 struct drm_amdgpu_info_hw_ip info;
3140 uint32_t ring_id;
3141
3142 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
3143 CU_ASSERT_EQUAL(r, 0);
3144 if (!info.available_rings)
3145 printf("SKIP ... as there's no graphics ring\n");
3146
3147 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
3148 amdgpu_memset_draw_test(device_handle, ring_id);
3149 amdgpu_memcpy_draw_test(device_handle, ring_id);
3150 }
3151 }
3152
amdgpu_gpu_reset_test(void)3153 static void amdgpu_gpu_reset_test(void)
3154 {
3155 int r;
3156 char debugfs_path[256], tmp[10];
3157 int fd;
3158 struct stat sbuf;
3159 amdgpu_context_handle context_handle;
3160 uint32_t hang_state, hangs;
3161
3162 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3163 CU_ASSERT_EQUAL(r, 0);
3164
3165 r = fstat(drm_amdgpu[0], &sbuf);
3166 CU_ASSERT_EQUAL(r, 0);
3167
3168 sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
3169 fd = open(debugfs_path, O_RDONLY);
3170 CU_ASSERT(fd >= 0);
3171
3172 r = read(fd, tmp, sizeof(tmp)/sizeof(char));
3173 CU_ASSERT(r > 0);
3174
3175 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3176 CU_ASSERT_EQUAL(r, 0);
3177 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3178
3179 close(fd);
3180 r = amdgpu_cs_ctx_free(context_handle);
3181 CU_ASSERT_EQUAL(r, 0);
3182
3183 amdgpu_compute_dispatch_test();
3184 amdgpu_gfx_dispatch_test();
3185 }
3186