1 /*
2  * Copyright © 2019 Google LLC
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "tu_cs.h"
25 
26 /**
27  * Initialize a command stream.
28  */
29 void
tu_cs_init(struct tu_cs * cs,struct tu_device * device,enum tu_cs_mode mode,uint32_t initial_size)30 tu_cs_init(struct tu_cs *cs,
31            struct tu_device *device,
32            enum tu_cs_mode mode,
33            uint32_t initial_size)
34 {
35    assert(mode != TU_CS_MODE_EXTERNAL);
36 
37    memset(cs, 0, sizeof(*cs));
38 
39    cs->device = device;
40    cs->mode = mode;
41    cs->next_bo_size = initial_size;
42 }
43 
44 /**
45  * Initialize a command stream as a wrapper to an external buffer.
46  */
47 void
tu_cs_init_external(struct tu_cs * cs,struct tu_device * device,uint32_t * start,uint32_t * end)48 tu_cs_init_external(struct tu_cs *cs, struct tu_device *device,
49                     uint32_t *start, uint32_t *end)
50 {
51    memset(cs, 0, sizeof(*cs));
52 
53    cs->device = device;
54    cs->mode = TU_CS_MODE_EXTERNAL;
55    cs->start = cs->reserved_end = cs->cur = start;
56    cs->end = end;
57 }
58 
59 /**
60  * Finish and release all resources owned by a command stream.
61  */
62 void
tu_cs_finish(struct tu_cs * cs)63 tu_cs_finish(struct tu_cs *cs)
64 {
65    for (uint32_t i = 0; i < cs->bo_count; ++i) {
66       tu_bo_finish(cs->device, cs->bos[i]);
67       free(cs->bos[i]);
68    }
69 
70    free(cs->entries);
71    free(cs->bos);
72 }
73 
74 /**
75  * Get the offset of the command packets emitted since the last call to
76  * tu_cs_add_entry.
77  */
78 static uint32_t
tu_cs_get_offset(const struct tu_cs * cs)79 tu_cs_get_offset(const struct tu_cs *cs)
80 {
81    assert(cs->bo_count);
82    return cs->start - (uint32_t *) cs->bos[cs->bo_count - 1]->map;
83 }
84 
85 /*
86  * Allocate and add a BO to a command stream.  Following command packets will
87  * be emitted to the new BO.
88  */
89 static VkResult
tu_cs_add_bo(struct tu_cs * cs,uint32_t size)90 tu_cs_add_bo(struct tu_cs *cs, uint32_t size)
91 {
92    /* no BO for TU_CS_MODE_EXTERNAL */
93    assert(cs->mode != TU_CS_MODE_EXTERNAL);
94 
95    /* no dangling command packet */
96    assert(tu_cs_is_empty(cs));
97 
98    /* grow cs->bos if needed */
99    if (cs->bo_count == cs->bo_capacity) {
100       uint32_t new_capacity = MAX2(4, 2 * cs->bo_capacity);
101       struct tu_bo **new_bos =
102          realloc(cs->bos, new_capacity * sizeof(struct tu_bo *));
103       if (!new_bos)
104          return VK_ERROR_OUT_OF_HOST_MEMORY;
105 
106       cs->bo_capacity = new_capacity;
107       cs->bos = new_bos;
108    }
109 
110    struct tu_bo *new_bo = malloc(sizeof(struct tu_bo));
111    if (!new_bo)
112       return VK_ERROR_OUT_OF_HOST_MEMORY;
113 
114    VkResult result =
115       tu_bo_init_new(cs->device, new_bo, size * sizeof(uint32_t),
116                      TU_BO_ALLOC_GPU_READ_ONLY | TU_BO_ALLOC_ALLOW_DUMP);
117    if (result != VK_SUCCESS) {
118       free(new_bo);
119       return result;
120    }
121 
122    result = tu_bo_map(cs->device, new_bo);
123    if (result != VK_SUCCESS) {
124       tu_bo_finish(cs->device, new_bo);
125       free(new_bo);
126       return result;
127    }
128 
129    cs->bos[cs->bo_count++] = new_bo;
130 
131    cs->start = cs->cur = cs->reserved_end = (uint32_t *) new_bo->map;
132    cs->end = cs->start + new_bo->size / sizeof(uint32_t);
133 
134    return VK_SUCCESS;
135 }
136 
137 /**
138  * Reserve an IB entry.
139  */
140 static VkResult
tu_cs_reserve_entry(struct tu_cs * cs)141 tu_cs_reserve_entry(struct tu_cs *cs)
142 {
143    /* entries are only for TU_CS_MODE_GROW */
144    assert(cs->mode == TU_CS_MODE_GROW);
145 
146    /* grow cs->entries if needed */
147    if (cs->entry_count == cs->entry_capacity) {
148       uint32_t new_capacity = MAX2(4, cs->entry_capacity * 2);
149       struct tu_cs_entry *new_entries =
150          realloc(cs->entries, new_capacity * sizeof(struct tu_cs_entry));
151       if (!new_entries)
152          return VK_ERROR_OUT_OF_HOST_MEMORY;
153 
154       cs->entry_capacity = new_capacity;
155       cs->entries = new_entries;
156    }
157 
158    return VK_SUCCESS;
159 }
160 
161 /**
162  * Add an IB entry for the command packets emitted since the last call to this
163  * function.
164  */
165 static void
tu_cs_add_entry(struct tu_cs * cs)166 tu_cs_add_entry(struct tu_cs *cs)
167 {
168    /* entries are only for TU_CS_MODE_GROW */
169    assert(cs->mode == TU_CS_MODE_GROW);
170 
171    /* disallow empty entry */
172    assert(!tu_cs_is_empty(cs));
173 
174    /*
175     * because we disallow empty entry, tu_cs_add_bo and tu_cs_reserve_entry
176     * must both have been called
177     */
178    assert(cs->bo_count);
179    assert(cs->entry_count < cs->entry_capacity);
180 
181    /* add an entry for [cs->start, cs->cur] */
182    cs->entries[cs->entry_count++] = (struct tu_cs_entry) {
183       .bo = cs->bos[cs->bo_count - 1],
184       .size = tu_cs_get_size(cs) * sizeof(uint32_t),
185       .offset = tu_cs_get_offset(cs) * sizeof(uint32_t),
186    };
187 
188    cs->start = cs->cur;
189 }
190 
191 /**
192  * same behavior as tu_cs_emit_call but without the indirect
193  */
194 VkResult
tu_cs_add_entries(struct tu_cs * cs,struct tu_cs * target)195 tu_cs_add_entries(struct tu_cs *cs, struct tu_cs *target)
196 {
197    VkResult result;
198 
199    assert(cs->mode == TU_CS_MODE_GROW);
200    assert(target->mode == TU_CS_MODE_GROW);
201 
202    if (!tu_cs_is_empty(cs))
203       tu_cs_add_entry(cs);
204 
205    for (unsigned i = 0; i < target->entry_count; i++) {
206       result = tu_cs_reserve_entry(cs);
207       if (result != VK_SUCCESS)
208          return result;
209       cs->entries[cs->entry_count++] = target->entries[i];
210    }
211 
212    return VK_SUCCESS;
213 }
214 
215 /**
216  * Begin (or continue) command packet emission.  This does nothing but sanity
217  * checks currently.  \a cs must not be in TU_CS_MODE_SUB_STREAM mode.
218  */
219 void
tu_cs_begin(struct tu_cs * cs)220 tu_cs_begin(struct tu_cs *cs)
221 {
222    assert(cs->mode != TU_CS_MODE_SUB_STREAM);
223    assert(tu_cs_is_empty(cs));
224 }
225 
226 /**
227  * End command packet emission.  This adds an IB entry when \a cs is in
228  * TU_CS_MODE_GROW mode.
229  */
230 void
tu_cs_end(struct tu_cs * cs)231 tu_cs_end(struct tu_cs *cs)
232 {
233    assert(cs->mode != TU_CS_MODE_SUB_STREAM);
234 
235    if (cs->mode == TU_CS_MODE_GROW && !tu_cs_is_empty(cs))
236       tu_cs_add_entry(cs);
237 }
238 
239 /**
240  * Begin command packet emission to a sub-stream.  \a cs must be in
241  * TU_CS_MODE_SUB_STREAM mode.
242  *
243  * Return \a sub_cs which is in TU_CS_MODE_EXTERNAL mode.  tu_cs_begin and
244  * tu_cs_reserve_space are implied and \a sub_cs is ready for command packet
245  * emission.
246  */
247 VkResult
tu_cs_begin_sub_stream(struct tu_cs * cs,uint32_t size,struct tu_cs * sub_cs)248 tu_cs_begin_sub_stream(struct tu_cs *cs, uint32_t size, struct tu_cs *sub_cs)
249 {
250    assert(cs->mode == TU_CS_MODE_SUB_STREAM);
251    assert(size);
252 
253    VkResult result = tu_cs_reserve_space(cs, size);
254    if (result != VK_SUCCESS)
255       return result;
256 
257    tu_cs_init_external(sub_cs, cs->device, cs->cur, cs->reserved_end);
258    tu_cs_begin(sub_cs);
259    result = tu_cs_reserve_space(sub_cs, size);
260    assert(result == VK_SUCCESS);
261 
262    return VK_SUCCESS;
263 }
264 
265 /**
266  * Allocate count*size dwords, aligned to size dwords.
267  * \a cs must be in TU_CS_MODE_SUB_STREAM mode.
268  *
269  */
270 VkResult
tu_cs_alloc(struct tu_cs * cs,uint32_t count,uint32_t size,struct tu_cs_memory * memory)271 tu_cs_alloc(struct tu_cs *cs,
272             uint32_t count,
273             uint32_t size,
274             struct tu_cs_memory *memory)
275 {
276    assert(cs->mode == TU_CS_MODE_SUB_STREAM);
277    assert(size && size <= 1024);
278 
279    if (!count)
280       return VK_SUCCESS;
281 
282    /* TODO: smarter way to deal with alignment? */
283 
284    VkResult result = tu_cs_reserve_space(cs, count * size + (size-1));
285    if (result != VK_SUCCESS)
286       return result;
287 
288    struct tu_bo *bo = cs->bos[cs->bo_count - 1];
289    size_t offset = align(tu_cs_get_offset(cs), size);
290 
291    memory->map = bo->map + offset * sizeof(uint32_t);
292    memory->iova = bo->iova + offset * sizeof(uint32_t);
293 
294    cs->start = cs->cur = (uint32_t*) bo->map + offset + count * size;
295 
296    return VK_SUCCESS;
297 }
298 
299 /**
300  * End command packet emission to a sub-stream.  \a sub_cs becomes invalid
301  * after this call.
302  *
303  * Return an IB entry for the sub-stream.  The entry has the same lifetime as
304  * \a cs.
305  */
306 struct tu_cs_entry
tu_cs_end_sub_stream(struct tu_cs * cs,struct tu_cs * sub_cs)307 tu_cs_end_sub_stream(struct tu_cs *cs, struct tu_cs *sub_cs)
308 {
309    assert(cs->mode == TU_CS_MODE_SUB_STREAM);
310    assert(cs->bo_count);
311    assert(sub_cs->start == cs->cur && sub_cs->end == cs->reserved_end);
312    tu_cs_sanity_check(sub_cs);
313 
314    tu_cs_end(sub_cs);
315 
316    cs->cur = sub_cs->cur;
317 
318    struct tu_cs_entry entry = {
319       .bo = cs->bos[cs->bo_count - 1],
320       .size = tu_cs_get_size(cs) * sizeof(uint32_t),
321       .offset = tu_cs_get_offset(cs) * sizeof(uint32_t),
322    };
323 
324    cs->start = cs->cur;
325 
326    return entry;
327 }
328 
329 /**
330  * Reserve space from a command stream for \a reserved_size uint32_t values.
331  * This never fails when \a cs has mode TU_CS_MODE_EXTERNAL.
332  */
333 VkResult
tu_cs_reserve_space(struct tu_cs * cs,uint32_t reserved_size)334 tu_cs_reserve_space(struct tu_cs *cs, uint32_t reserved_size)
335 {
336    if (tu_cs_get_space(cs) < reserved_size) {
337       if (cs->mode == TU_CS_MODE_EXTERNAL) {
338          unreachable("cannot grow external buffer");
339          return VK_ERROR_OUT_OF_HOST_MEMORY;
340       }
341 
342       /* add an entry for the exiting command packets */
343       if (!tu_cs_is_empty(cs)) {
344          /* no direct command packet for TU_CS_MODE_SUB_STREAM */
345          assert(cs->mode != TU_CS_MODE_SUB_STREAM);
346 
347          tu_cs_add_entry(cs);
348       }
349 
350       if (cs->cond_flags) {
351          /* Subtract one here to account for the DWORD field itself. */
352          *cs->cond_dwords = cs->cur - cs->cond_dwords - 1;
353 
354          /* space for CP_COND_REG_EXEC in next bo */
355          reserved_size += 3;
356       }
357 
358       /* switch to a new BO */
359       uint32_t new_size = MAX2(cs->next_bo_size, reserved_size);
360       VkResult result = tu_cs_add_bo(cs, new_size);
361       if (result != VK_SUCCESS)
362          return result;
363 
364       /* if inside a condition, emit a new CP_COND_REG_EXEC */
365       if (cs->cond_flags) {
366          cs->reserved_end = cs->cur + reserved_size;
367 
368          tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
369          tu_cs_emit(cs, cs->cond_flags);
370 
371          cs->cond_dwords = cs->cur;
372 
373          /* Emit dummy DWORD field here */
374          tu_cs_emit(cs, CP_COND_REG_EXEC_1_DWORDS(0));
375       }
376 
377       /* double the size for the next bo, also there is an upper
378        * bound on IB size, which appears to be 0x0fffff
379        */
380       new_size = MIN2(new_size << 1, 0x0fffff);
381       if (cs->next_bo_size < new_size)
382          cs->next_bo_size = new_size;
383    }
384 
385    assert(tu_cs_get_space(cs) >= reserved_size);
386    cs->reserved_end = cs->cur + reserved_size;
387 
388    if (cs->mode == TU_CS_MODE_GROW) {
389       /* reserve an entry for the next call to this function or tu_cs_end */
390       return tu_cs_reserve_entry(cs);
391    }
392 
393    return VK_SUCCESS;
394 }
395 
396 /**
397  * Reset a command stream to its initial state.  This discards all comand
398  * packets in \a cs, but does not necessarily release all resources.
399  */
400 void
tu_cs_reset(struct tu_cs * cs)401 tu_cs_reset(struct tu_cs *cs)
402 {
403    if (cs->mode == TU_CS_MODE_EXTERNAL) {
404       assert(!cs->bo_count && !cs->entry_count);
405       cs->reserved_end = cs->cur = cs->start;
406       return;
407    }
408 
409    for (uint32_t i = 0; i + 1 < cs->bo_count; ++i) {
410       tu_bo_finish(cs->device, cs->bos[i]);
411       free(cs->bos[i]);
412    }
413 
414    if (cs->bo_count) {
415       cs->bos[0] = cs->bos[cs->bo_count - 1];
416       cs->bo_count = 1;
417 
418       cs->start = cs->cur = cs->reserved_end = (uint32_t *) cs->bos[0]->map;
419       cs->end = cs->start + cs->bos[0]->size / sizeof(uint32_t);
420    }
421 
422    cs->entry_count = 0;
423 }
424