1 /*
2  * Copyright © 2019 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <fcntl.h>
25 #include <string.h>
26 #include <xf86drm.h>
27 
28 #include <gtest/gtest.h>
29 
30 #include "dev/intel_device_info.h"
31 #include "drm-uapi/i915_drm.h"
32 #include "genxml/gen_macros.h"
33 #include "util/macros.h"
34 
35 class mi_builder_test;
36 
37 struct address {
38    uint32_t gem_handle;
39    uint32_t offset;
40 };
41 
42 #define __gen_address_type struct address
43 #define __gen_user_data ::mi_builder_test
44 
45 uint64_t __gen_combine_address(mi_builder_test *test, void *location,
46                                struct address addr, uint32_t delta);
47 void * __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords);
48 struct address __gen_get_batch_address(mi_builder_test *test,
49                                        void *location);
50 
51 struct address
__gen_address_offset(address addr,uint64_t offset)52 __gen_address_offset(address addr, uint64_t offset)
53 {
54    addr.offset += offset;
55    return addr;
56 }
57 
58 #if GFX_VERx10 >= 75
59 #define RSVD_TEMP_REG 0x2678 /* MI_ALU_REG15 */
60 #else
61 #define RSVD_TEMP_REG 0x2430 /* GFX7_3DPRIM_START_VERTEX */
62 #endif
63 #define MI_BUILDER_NUM_ALLOC_GPRS 15
64 #define INPUT_DATA_OFFSET 0
65 #define OUTPUT_DATA_OFFSET 2048
66 
67 #define __genxml_cmd_length(cmd) cmd ## _length
68 #define __genxml_cmd_length_bias(cmd) cmd ## _length_bias
69 #define __genxml_cmd_header(cmd) cmd ## _header
70 #define __genxml_cmd_pack(cmd) cmd ## _pack
71 
72 #include "genxml/genX_pack.h"
73 #include "mi_builder.h"
74 
75 #define emit_cmd(cmd, name)                                           \
76    for (struct cmd name = { __genxml_cmd_header(cmd) },               \
77         *_dst = (struct cmd *) emit_dwords(__genxml_cmd_length(cmd)); \
78         __builtin_expect(_dst != NULL, 1);                            \
79         __genxml_cmd_pack(cmd)(this, (void *)_dst, &name), _dst = NULL)
80 
81 #include <vector>
82 
83 class mi_builder_test : public ::testing::Test {
84 public:
85    mi_builder_test();
86    ~mi_builder_test();
87 
88    void SetUp();
89 
90    void *emit_dwords(int num_dwords);
91    void submit_batch();
92 
in_addr(uint32_t offset)93    inline address in_addr(uint32_t offset)
94    {
95       address addr;
96       addr.gem_handle = data_bo_handle;
97       addr.offset = INPUT_DATA_OFFSET + offset;
98       return addr;
99    }
100 
out_addr(uint32_t offset)101    inline address out_addr(uint32_t offset)
102    {
103       address addr;
104       addr.gem_handle = data_bo_handle;
105       addr.offset = OUTPUT_DATA_OFFSET + offset;
106       return addr;
107    }
108 
in_mem64(uint32_t offset)109    inline mi_value in_mem64(uint32_t offset)
110    {
111       return mi_mem64(in_addr(offset));
112    }
113 
in_mem32(uint32_t offset)114    inline mi_value in_mem32(uint32_t offset)
115    {
116       return mi_mem32(in_addr(offset));
117    }
118 
out_mem64(uint32_t offset)119    inline mi_value out_mem64(uint32_t offset)
120    {
121       return mi_mem64(out_addr(offset));
122    }
123 
out_mem32(uint32_t offset)124    inline mi_value out_mem32(uint32_t offset)
125    {
126       return mi_mem32(out_addr(offset));
127    }
128 
129    int fd;
130    int ctx_id;
131    intel_device_info devinfo;
132 
133    uint32_t batch_bo_handle;
134 #if GFX_VER >= 8
135    uint64_t batch_bo_addr;
136 #endif
137    uint32_t batch_offset;
138    void *batch_map;
139 
140 #if GFX_VER < 8
141    std::vector<drm_i915_gem_relocation_entry> relocs;
142 #endif
143 
144    uint32_t data_bo_handle;
145 #if GFX_VER >= 8
146    uint64_t data_bo_addr;
147 #endif
148    void *data_map;
149    char *input;
150    char *output;
151    uint64_t canary;
152 
153    mi_builder b;
154 };
155 
mi_builder_test()156 mi_builder_test::mi_builder_test() :
157   fd(-1)
158 { }
159 
~mi_builder_test()160 mi_builder_test::~mi_builder_test()
161 {
162    close(fd);
163 }
164 
165 // 1 MB of batch should be enough for anyone, right?
166 #define BATCH_BO_SIZE (256 * 4096)
167 #define DATA_BO_SIZE 4096
168 
169 void
SetUp()170 mi_builder_test::SetUp()
171 {
172    drmDevicePtr devices[8];
173    int max_devices = drmGetDevices2(0, devices, 8);
174 
175    int i;
176    for (i = 0; i < max_devices; i++) {
177       if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
178           devices[i]->bustype == DRM_BUS_PCI &&
179           devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
180          fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);
181          if (fd < 0)
182             continue;
183 
184          /* We don't really need to do this when running on hardware because
185           * we can just pull it from the drmDevice.  However, without doing
186           * this, intel_dump_gpu gets a bit of heartburn and we can't use the
187           * --device option with it.
188           */
189          int device_id;
190          drm_i915_getparam getparam = drm_i915_getparam();
191          getparam.param = I915_PARAM_CHIPSET_ID;
192          getparam.value = &device_id;
193          ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GETPARAM,
194                             (void *)&getparam), 0) << strerror(errno);
195 
196          ASSERT_TRUE(intel_get_device_info_from_pci_id(device_id, &devinfo));
197          if (devinfo.ver != GFX_VER || devinfo.is_haswell != (GFX_VERx10 == 75)) {
198             close(fd);
199             fd = -1;
200             continue;
201          }
202 
203 
204          /* Found a device! */
205          break;
206       }
207    }
208    ASSERT_TRUE(i < max_devices) << "Failed to find a DRM device";
209 
210    drm_i915_gem_context_create ctx_create = drm_i915_gem_context_create();
211    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE,
212                       (void *)&ctx_create), 0) << strerror(errno);
213    ctx_id = ctx_create.ctx_id;
214 
215    if (GFX_VER >= 8) {
216       /* On gfx8+, we require softpin */
217       int has_softpin;
218       drm_i915_getparam getparam = drm_i915_getparam();
219       getparam.param = I915_PARAM_HAS_EXEC_SOFTPIN;
220       getparam.value = &has_softpin;
221       ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GETPARAM,
222                          (void *)&getparam), 0) << strerror(errno);
223       ASSERT_TRUE(has_softpin);
224    }
225 
226    // Create the batch buffer
227    drm_i915_gem_create gem_create = drm_i915_gem_create();
228    gem_create.size = BATCH_BO_SIZE;
229    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
230                       (void *)&gem_create), 0) << strerror(errno);
231    batch_bo_handle = gem_create.handle;
232 #if GFX_VER >= 8
233    batch_bo_addr = 0xffffffffdff70000ULL;
234 #endif
235 
236    drm_i915_gem_caching gem_caching = drm_i915_gem_caching();
237    gem_caching.handle = batch_bo_handle;
238    gem_caching.caching = I915_CACHING_CACHED;
239    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
240                       (void *)&gem_caching), 0) << strerror(errno);
241 
242    drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap();
243    gem_mmap.handle = batch_bo_handle;
244    gem_mmap.offset = 0;
245    gem_mmap.size = BATCH_BO_SIZE;
246    gem_mmap.flags = 0;
247    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
248                       (void *)&gem_mmap), 0) << strerror(errno);
249    batch_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
250 
251    // Start the batch at zero
252    batch_offset = 0;
253 
254    // Create the data buffer
255    gem_create = drm_i915_gem_create();
256    gem_create.size = DATA_BO_SIZE;
257    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
258                       (void *)&gem_create), 0) << strerror(errno);
259    data_bo_handle = gem_create.handle;
260 #if GFX_VER >= 8
261    data_bo_addr = 0xffffffffefff0000ULL;
262 #endif
263 
264    gem_caching = drm_i915_gem_caching();
265    gem_caching.handle = data_bo_handle;
266    gem_caching.caching = I915_CACHING_CACHED;
267    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
268                       (void *)&gem_caching), 0) << strerror(errno);
269 
270    gem_mmap = drm_i915_gem_mmap();
271    gem_mmap.handle = data_bo_handle;
272    gem_mmap.offset = 0;
273    gem_mmap.size = DATA_BO_SIZE;
274    gem_mmap.flags = 0;
275    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
276                       (void *)&gem_mmap), 0) << strerror(errno);
277    data_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
278    input = (char *)data_map + INPUT_DATA_OFFSET;
279    output = (char *)data_map + OUTPUT_DATA_OFFSET;
280 
281    // Fill the test data with garbage
282    memset(data_map, 139, DATA_BO_SIZE);
283    memset(&canary, 139, sizeof(canary));
284 
285    mi_builder_init(&b, &devinfo, this);
286 }
287 
288 void *
emit_dwords(int num_dwords)289 mi_builder_test::emit_dwords(int num_dwords)
290 {
291    void *ptr = (void *)((char *)batch_map + batch_offset);
292    batch_offset += num_dwords * 4;
293    assert(batch_offset < BATCH_BO_SIZE);
294    return ptr;
295 }
296 
297 void
submit_batch()298 mi_builder_test::submit_batch()
299 {
300    mi_builder_emit(&b, GENX(MI_BATCH_BUFFER_END), bbe);
301 
302    // Round batch up to an even number of dwords.
303    if (batch_offset & 4)
304       mi_builder_emit(&b, GENX(MI_NOOP), noop);
305 
306    drm_i915_gem_exec_object2 objects[2];
307    memset(objects, 0, sizeof(objects));
308 
309    objects[0].handle = data_bo_handle;
310    objects[0].relocation_count = 0;
311    objects[0].relocs_ptr = 0;
312 #if GFX_VER >= 8 /* On gfx8+, we pin everything */
313    objects[0].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
314                       EXEC_OBJECT_PINNED |
315                       EXEC_OBJECT_WRITE;
316    objects[0].offset = data_bo_addr;
317 #else
318    objects[0].flags = EXEC_OBJECT_WRITE;
319    objects[0].offset = -1;
320 #endif
321 
322    objects[1].handle = batch_bo_handle;
323 #if GFX_VER >= 8 /* On gfx8+, we don't use relocations */
324    objects[1].relocation_count = 0;
325    objects[1].relocs_ptr = 0;
326    objects[1].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
327                       EXEC_OBJECT_PINNED;
328    objects[1].offset = batch_bo_addr;
329 #else
330    objects[1].relocation_count = relocs.size();
331    objects[1].relocs_ptr = (uintptr_t)(void *)&relocs[0];
332    objects[1].flags = 0;
333    objects[1].offset = -1;
334 #endif
335 
336    drm_i915_gem_execbuffer2 execbuf = drm_i915_gem_execbuffer2();
337    execbuf.buffers_ptr = (uintptr_t)(void *)objects;
338    execbuf.buffer_count = 2;
339    execbuf.batch_start_offset = 0;
340    execbuf.batch_len = batch_offset;
341    execbuf.flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER;
342    execbuf.rsvd1 = ctx_id;
343 
344    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2,
345                       (void *)&execbuf), 0) << strerror(errno);
346 
347    drm_i915_gem_wait gem_wait = drm_i915_gem_wait();
348    gem_wait.bo_handle = batch_bo_handle;
349    gem_wait.timeout_ns = INT64_MAX;
350    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_WAIT,
351                       (void *)&gem_wait), 0) << strerror(errno);
352 }
353 
354 uint64_t
__gen_combine_address(mi_builder_test * test,void * location,address addr,uint32_t delta)355 __gen_combine_address(mi_builder_test *test, void *location,
356                       address addr, uint32_t delta)
357 {
358 #if GFX_VER >= 8
359    uint64_t addr_u64 = addr.gem_handle == test->data_bo_handle ?
360                        test->data_bo_addr : test->batch_bo_addr;
361    return addr_u64 + addr.offset + delta;
362 #else
363    drm_i915_gem_relocation_entry reloc = drm_i915_gem_relocation_entry();
364    reloc.target_handle = addr.gem_handle == test->data_bo_handle ? 0 : 1;
365    reloc.delta = addr.offset + delta;
366    reloc.offset = (char *)location - (char *)test->batch_map;
367    reloc.presumed_offset = -1;
368    test->relocs.push_back(reloc);
369 
370    return reloc.delta;
371 #endif
372 }
373 
374 void *
__gen_get_batch_dwords(mi_builder_test * test,unsigned num_dwords)375 __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords)
376 {
377    return test->emit_dwords(num_dwords);
378 }
379 
380 struct address
__gen_get_batch_address(mi_builder_test * test,void * location)381 __gen_get_batch_address(mi_builder_test *test, void *location)
382 {
383    assert(location >= test->batch_map);
384    size_t offset = (char *)location - (char *)test->batch_map;
385    assert(offset < BATCH_BO_SIZE);
386    assert(offset <= UINT32_MAX);
387 
388    return (struct address) {
389       .gem_handle = test->batch_bo_handle,
390       .offset = (uint32_t)offset,
391    };
392 }
393 
394 #include "genxml/genX_pack.h"
395 #include "mi_builder.h"
396 
TEST_F(mi_builder_test,imm_mem)397 TEST_F(mi_builder_test, imm_mem)
398 {
399    const uint64_t value = 0x0123456789abcdef;
400 
401    mi_store(&b, out_mem64(0), mi_imm(value));
402    mi_store(&b, out_mem32(8), mi_imm(value));
403 
404    submit_batch();
405 
406    // 64 -> 64
407    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
408 
409    // 64 -> 32
410    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
411    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
412 }
413 
414 /* mem -> mem copies are only supported on HSW+ */
415 #if GFX_VERx10 >= 75
TEST_F(mi_builder_test,mem_mem)416 TEST_F(mi_builder_test, mem_mem)
417 {
418    const uint64_t value = 0x0123456789abcdef;
419    *(uint64_t *)input = value;
420 
421    mi_store(&b, out_mem64(0),   in_mem64(0));
422    mi_store(&b, out_mem32(8),   in_mem64(0));
423    mi_store(&b, out_mem32(16),  in_mem32(0));
424    mi_store(&b, out_mem64(24),  in_mem32(0));
425 
426    submit_batch();
427 
428    // 64 -> 64
429    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
430 
431    // 64 -> 32
432    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
433    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
434 
435    // 32 -> 32
436    EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
437    EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
438 
439    // 32 -> 64
440    EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
441 }
442 #endif
443 
TEST_F(mi_builder_test,imm_reg)444 TEST_F(mi_builder_test, imm_reg)
445 {
446    const uint64_t value = 0x0123456789abcdef;
447 
448    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
449    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(value));
450    mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
451 
452    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
453    mi_store(&b, mi_reg32(RSVD_TEMP_REG), mi_imm(value));
454    mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
455 
456    submit_batch();
457 
458    // 64 -> 64
459    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
460 
461    // 64 -> 32
462    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
463    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
464 }
465 
TEST_F(mi_builder_test,mem_reg)466 TEST_F(mi_builder_test, mem_reg)
467 {
468    const uint64_t value = 0x0123456789abcdef;
469    *(uint64_t *)input = value;
470 
471    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
472    mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem64(0));
473    mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
474 
475    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
476    mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem64(0));
477    mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
478 
479    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
480    mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem32(0));
481    mi_store(&b, out_mem64(16), mi_reg64(RSVD_TEMP_REG));
482 
483    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
484    mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem32(0));
485    mi_store(&b, out_mem64(24), mi_reg64(RSVD_TEMP_REG));
486 
487    submit_batch();
488 
489    // 64 -> 64
490    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
491 
492    // 64 -> 32
493    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
494    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
495 
496    // 32 -> 32
497    EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
498    EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
499 
500    // 32 -> 64
501    EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
502 }
503 
TEST_F(mi_builder_test,memset)504 TEST_F(mi_builder_test, memset)
505 {
506    const unsigned memset_size = 256;
507 
508    mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size);
509 
510    submit_batch();
511 
512    uint32_t *out_u32 = (uint32_t *)output;
513    for (unsigned i = 0; i <  memset_size / sizeof(*out_u32); i++)
514       EXPECT_EQ(out_u32[i], 0xdeadbeef);
515 }
516 
TEST_F(mi_builder_test,memcpy)517 TEST_F(mi_builder_test, memcpy)
518 {
519    const unsigned memcpy_size = 256;
520 
521    uint8_t *in_u8 = (uint8_t *)input;
522    for (unsigned i = 0; i < memcpy_size; i++)
523       in_u8[i] = i;
524 
525    mi_memcpy(&b, out_addr(0), in_addr(0), 256);
526 
527    submit_batch();
528 
529    uint8_t *out_u8 = (uint8_t *)output;
530    for (unsigned i = 0; i < memcpy_size; i++)
531       EXPECT_EQ(out_u8[i], i);
532 }
533 
534 /* Start of MI_MATH section */
535 #if GFX_VERx10 >= 75
536 
537 #define EXPECT_EQ_IMM(x, imm) EXPECT_EQ(x, mi_value_to_u64(imm))
538 
TEST_F(mi_builder_test,inot)539 TEST_F(mi_builder_test, inot)
540 {
541    const uint64_t value = 0x0123456789abcdef;
542    const uint32_t value_lo = (uint32_t)value;
543    const uint32_t value_hi = (uint32_t)(value >> 32);
544    memcpy(input, &value, sizeof(value));
545 
546    mi_store(&b, out_mem64(0),  mi_inot(&b, in_mem64(0)));
547    mi_store(&b, out_mem64(8),  mi_inot(&b, mi_inot(&b, in_mem64(0))));
548    mi_store(&b, out_mem64(16), mi_inot(&b, in_mem32(0)));
549    mi_store(&b, out_mem64(24), mi_inot(&b, in_mem32(4)));
550    mi_store(&b, out_mem32(32), mi_inot(&b, in_mem64(0)));
551    mi_store(&b, out_mem32(36), mi_inot(&b, in_mem32(0)));
552    mi_store(&b, out_mem32(40), mi_inot(&b, mi_inot(&b, in_mem32(0))));
553    mi_store(&b, out_mem32(44), mi_inot(&b, in_mem32(4)));
554 
555    submit_batch();
556 
557    EXPECT_EQ(*(uint64_t *)(output + 0),  ~value);
558    EXPECT_EQ(*(uint64_t *)(output + 8),  value);
559    EXPECT_EQ(*(uint64_t *)(output + 16), ~(uint64_t)value_lo);
560    EXPECT_EQ(*(uint64_t *)(output + 24), ~(uint64_t)value_hi);
561    EXPECT_EQ(*(uint32_t *)(output + 32), (uint32_t)~value);
562    EXPECT_EQ(*(uint32_t *)(output + 36), (uint32_t)~value_lo);
563    EXPECT_EQ(*(uint32_t *)(output + 40), (uint32_t)value_lo);
564    EXPECT_EQ(*(uint32_t *)(output + 44), (uint32_t)~value_hi);
565 }
566 
567 /* Test adding of immediates of all kinds including
568  *
569  *  - All zeroes
570  *  - All ones
571  *  - inverted constants
572  */
TEST_F(mi_builder_test,add_imm)573 TEST_F(mi_builder_test, add_imm)
574 {
575    const uint64_t value = 0x0123456789abcdef;
576    const uint64_t add = 0xdeadbeefac0ffee2;
577    memcpy(input, &value, sizeof(value));
578 
579    mi_store(&b, out_mem64(0),
580                 mi_iadd(&b, in_mem64(0), mi_imm(0)));
581    mi_store(&b, out_mem64(8),
582                 mi_iadd(&b, in_mem64(0), mi_imm(-1)));
583    mi_store(&b, out_mem64(16),
584                 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(0))));
585    mi_store(&b, out_mem64(24),
586                 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(-1))));
587    mi_store(&b, out_mem64(32),
588                 mi_iadd(&b, in_mem64(0), mi_imm(add)));
589    mi_store(&b, out_mem64(40),
590                 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(add))));
591    mi_store(&b, out_mem64(48),
592                 mi_iadd(&b, mi_imm(0), in_mem64(0)));
593    mi_store(&b, out_mem64(56),
594                 mi_iadd(&b, mi_imm(-1), in_mem64(0)));
595    mi_store(&b, out_mem64(64),
596                 mi_iadd(&b, mi_inot(&b, mi_imm(0)), in_mem64(0)));
597    mi_store(&b, out_mem64(72),
598                 mi_iadd(&b, mi_inot(&b, mi_imm(-1)), in_mem64(0)));
599    mi_store(&b, out_mem64(80),
600                 mi_iadd(&b, mi_imm(add), in_mem64(0)));
601    mi_store(&b, out_mem64(88),
602                 mi_iadd(&b, mi_inot(&b, mi_imm(add)), in_mem64(0)));
603 
604    // And som add_imm just for good measure
605    mi_store(&b, out_mem64(96), mi_iadd_imm(&b, in_mem64(0), 0));
606    mi_store(&b, out_mem64(104), mi_iadd_imm(&b, in_mem64(0), add));
607 
608    submit_batch();
609 
610    EXPECT_EQ(*(uint64_t *)(output + 0),   value);
611    EXPECT_EQ(*(uint64_t *)(output + 8),   value - 1);
612    EXPECT_EQ(*(uint64_t *)(output + 16),  value - 1);
613    EXPECT_EQ(*(uint64_t *)(output + 24),  value);
614    EXPECT_EQ(*(uint64_t *)(output + 32),  value + add);
615    EXPECT_EQ(*(uint64_t *)(output + 40),  value + ~add);
616    EXPECT_EQ(*(uint64_t *)(output + 48),  value);
617    EXPECT_EQ(*(uint64_t *)(output + 56),  value - 1);
618    EXPECT_EQ(*(uint64_t *)(output + 64),  value - 1);
619    EXPECT_EQ(*(uint64_t *)(output + 72),  value);
620    EXPECT_EQ(*(uint64_t *)(output + 80),  value + add);
621    EXPECT_EQ(*(uint64_t *)(output + 88),  value + ~add);
622    EXPECT_EQ(*(uint64_t *)(output + 96),  value);
623    EXPECT_EQ(*(uint64_t *)(output + 104), value + add);
624 }
625 
TEST_F(mi_builder_test,ult_uge_ieq_ine)626 TEST_F(mi_builder_test, ult_uge_ieq_ine)
627 {
628    uint64_t values[8] = {
629       0x0123456789abcdef,
630       0xdeadbeefac0ffee2,
631       (uint64_t)-1,
632       1,
633       0,
634       1049571,
635       (uint64_t)-240058,
636       20204184,
637    };
638    memcpy(input, values, sizeof(values));
639 
640    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
641       for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
642          mi_store(&b, out_mem64(i * 256 + j * 32 + 0),
643                       mi_ult(&b, in_mem64(i * 8), in_mem64(j * 8)));
644          mi_store(&b, out_mem64(i * 256 + j * 32 + 8),
645                       mi_uge(&b, in_mem64(i * 8), in_mem64(j * 8)));
646          mi_store(&b, out_mem64(i * 256 + j * 32 + 16),
647                       mi_ieq(&b, in_mem64(i * 8), in_mem64(j * 8)));
648          mi_store(&b, out_mem64(i * 256 + j * 32 + 24),
649                       mi_ine(&b, in_mem64(i * 8), in_mem64(j * 8)));
650       }
651    }
652 
653    submit_batch();
654 
655    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
656       for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
657          uint64_t *out_u64 = (uint64_t *)(output + i * 256 + j * 32);
658          EXPECT_EQ_IMM(out_u64[0], mi_ult(&b, mi_imm(values[i]),
659                                               mi_imm(values[j])));
660          EXPECT_EQ_IMM(out_u64[1], mi_uge(&b, mi_imm(values[i]),
661                                               mi_imm(values[j])));
662          EXPECT_EQ_IMM(out_u64[2], mi_ieq(&b, mi_imm(values[i]),
663                                               mi_imm(values[j])));
664          EXPECT_EQ_IMM(out_u64[3], mi_ine(&b, mi_imm(values[i]),
665                                               mi_imm(values[j])));
666       }
667    }
668 }
669 
TEST_F(mi_builder_test,z_nz)670 TEST_F(mi_builder_test, z_nz)
671 {
672    uint64_t values[8] = {
673       0,
674       1,
675       UINT32_MAX,
676       UINT32_MAX + 1,
677       UINT64_MAX,
678    };
679    memcpy(input, values, sizeof(values));
680 
681    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
682       mi_store(&b, out_mem64(i * 16 + 0), mi_nz(&b, in_mem64(i * 8)));
683       mi_store(&b, out_mem64(i * 16 + 8), mi_z(&b, in_mem64(i * 8)));
684    }
685 
686    submit_batch();
687 
688    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
689       uint64_t *out_u64 = (uint64_t *)(output + i * 16);
690       EXPECT_EQ_IMM(out_u64[0], mi_nz(&b, mi_imm(values[i])));
691       EXPECT_EQ_IMM(out_u64[1], mi_z(&b, mi_imm(values[i])));
692    }
693 }
694 
TEST_F(mi_builder_test,iand)695 TEST_F(mi_builder_test, iand)
696 {
697    const uint64_t values[2] = {
698       0x0123456789abcdef,
699       0xdeadbeefac0ffee2,
700    };
701    memcpy(input, values, sizeof(values));
702 
703    mi_store(&b, out_mem64(0), mi_iand(&b, in_mem64(0), in_mem64(8)));
704 
705    submit_batch();
706 
707    EXPECT_EQ_IMM(*(uint64_t *)output, mi_iand(&b, mi_imm(values[0]),
708                                                   mi_imm(values[1])));
709 }
710 
711 #if GFX_VERx10 >= 125
TEST_F(mi_builder_test,ishl)712 TEST_F(mi_builder_test, ishl)
713 {
714    const uint64_t value = 0x0123456789abcdef;
715    memcpy(input, &value, sizeof(value));
716 
717    uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
718    memcpy(input + 8, shifts, sizeof(shifts));
719 
720    for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
721       mi_store(&b, out_mem64(i * 8),
722                    mi_ishl(&b, in_mem64(0), in_mem32(8 + i * 4)));
723    }
724 
725    submit_batch();
726 
727    for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
728       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
729                     mi_ishl(&b, mi_imm(value), mi_imm(shifts[i])));
730    }
731 }
732 
TEST_F(mi_builder_test,ushr)733 TEST_F(mi_builder_test, ushr)
734 {
735    const uint64_t value = 0x0123456789abcdef;
736    memcpy(input, &value, sizeof(value));
737 
738    uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
739    memcpy(input + 8, shifts, sizeof(shifts));
740 
741    for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
742       mi_store(&b, out_mem64(i * 8),
743                    mi_ushr(&b, in_mem64(0), in_mem32(8 + i * 4)));
744    }
745 
746    submit_batch();
747 
748    for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
749       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
750                     mi_ushr(&b, mi_imm(value), mi_imm(shifts[i])));
751    }
752 }
753 
TEST_F(mi_builder_test,ushr_imm)754 TEST_F(mi_builder_test, ushr_imm)
755 {
756    const uint64_t value = 0x0123456789abcdef;
757    memcpy(input, &value, sizeof(value));
758 
759    const unsigned max_shift = 64;
760 
761    for (unsigned i = 0; i <= max_shift; i++)
762       mi_store(&b, out_mem64(i * 8), mi_ushr_imm(&b, in_mem64(0), i));
763 
764    submit_batch();
765 
766    for (unsigned i = 0; i <= max_shift; i++) {
767       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
768                     mi_ushr_imm(&b, mi_imm(value), i));
769    }
770 }
771 
TEST_F(mi_builder_test,ishr)772 TEST_F(mi_builder_test, ishr)
773 {
774    const uint64_t values[] = {
775       0x0123456789abcdef,
776       0xfedcba9876543210,
777    };
778    memcpy(input, values, sizeof(values));
779 
780    uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
781    memcpy(input + 16, shifts, sizeof(shifts));
782 
783    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
784       for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
785          mi_store(&b, out_mem64(i * 8 + j * 16),
786                       mi_ishr(&b, in_mem64(i * 8), in_mem32(16 + j * 4)));
787       }
788    }
789 
790    submit_batch();
791 
792    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
793       for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
794          EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8 + j * 16),
795                        mi_ishr(&b, mi_imm(values[i]), mi_imm(shifts[j])));
796       }
797    }
798 }
799 
TEST_F(mi_builder_test,ishr_imm)800 TEST_F(mi_builder_test, ishr_imm)
801 {
802    const uint64_t value = 0x0123456789abcdef;
803    memcpy(input, &value, sizeof(value));
804 
805    const unsigned max_shift = 64;
806 
807    for (unsigned i = 0; i <= max_shift; i++)
808       mi_store(&b, out_mem64(i * 8), mi_ishr_imm(&b, in_mem64(0), i));
809 
810    submit_batch();
811 
812    for (unsigned i = 0; i <= max_shift; i++) {
813       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
814                     mi_ishr_imm(&b, mi_imm(value), i));
815    }
816 }
817 #endif /* if GFX_VERx10 >= 125 */
818 
TEST_F(mi_builder_test,imul_imm)819 TEST_F(mi_builder_test, imul_imm)
820 {
821    uint64_t lhs[2] = {
822       0x0123456789abcdef,
823       0xdeadbeefac0ffee2,
824    };
825    memcpy(input, lhs, sizeof(lhs));
826 
827     /* Some random 32-bit unsigned integers.  The first four have been
828      * hand-chosen just to ensure some good low integers; the rest were
829      * generated with a python script.
830      */
831    uint32_t rhs[20] = {
832       1,       2,       3,       5,
833       10800,   193,     64,      40,
834       3796,    256,     88,      473,
835       1421,    706,     175,     850,
836       39,      38985,   1941,    17,
837    };
838 
839    for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
840       for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
841          mi_store(&b, out_mem64(i * 160 + j * 8),
842                       mi_imul_imm(&b, in_mem64(i * 8), rhs[j]));
843       }
844    }
845 
846    submit_batch();
847 
848    for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
849       for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
850          EXPECT_EQ_IMM(*(uint64_t *)(output + i * 160 + j * 8),
851                        mi_imul_imm(&b, mi_imm(lhs[i]), rhs[j]));
852       }
853    }
854 }
855 
TEST_F(mi_builder_test,ishl_imm)856 TEST_F(mi_builder_test, ishl_imm)
857 {
858    const uint64_t value = 0x0123456789abcdef;
859    memcpy(input, &value, sizeof(value));
860 
861    const unsigned max_shift = 64;
862 
863    for (unsigned i = 0; i <= max_shift; i++)
864       mi_store(&b, out_mem64(i * 8), mi_ishl_imm(&b, in_mem64(0), i));
865 
866    submit_batch();
867 
868    for (unsigned i = 0; i <= max_shift; i++) {
869       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
870                     mi_ishl_imm(&b, mi_imm(value), i));
871    }
872 }
873 
TEST_F(mi_builder_test,ushr32_imm)874 TEST_F(mi_builder_test, ushr32_imm)
875 {
876    const uint64_t value = 0x0123456789abcdef;
877    memcpy(input, &value, sizeof(value));
878 
879    const unsigned max_shift = 64;
880 
881    for (unsigned i = 0; i <= max_shift; i++)
882       mi_store(&b, out_mem64(i * 8), mi_ushr32_imm(&b, in_mem64(0), i));
883 
884    submit_batch();
885 
886    for (unsigned i = 0; i <= max_shift; i++) {
887       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
888                     mi_ushr32_imm(&b, mi_imm(value), i));
889    }
890 }
891 
TEST_F(mi_builder_test,udiv32_imm)892 TEST_F(mi_builder_test, udiv32_imm)
893 {
894     /* Some random 32-bit unsigned integers.  The first four have been
895      * hand-chosen just to ensure some good low integers; the rest were
896      * generated with a python script.
897      */
898    uint32_t values[20] = {
899       1,       2,       3,       5,
900       10800,   193,     64,      40,
901       3796,    256,     88,      473,
902       1421,    706,     175,     850,
903       39,      38985,   1941,    17,
904    };
905    memcpy(input, values, sizeof(values));
906 
907    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
908       for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
909          mi_store(&b, out_mem32(i * 80 + j * 4),
910                       mi_udiv32_imm(&b, in_mem32(i * 4), values[j]));
911       }
912    }
913 
914    submit_batch();
915 
916    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
917       for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
918          EXPECT_EQ_IMM(*(uint32_t *)(output + i * 80 + j * 4),
919                        mi_udiv32_imm(&b, mi_imm(values[i]), values[j]));
920       }
921    }
922 }
923 
TEST_F(mi_builder_test,store_if)924 TEST_F(mi_builder_test, store_if)
925 {
926    uint64_t u64 = 0xb453b411deadc0deull;
927    uint32_t u32 = 0x1337d00d;
928 
929    /* Write values with the predicate enabled */
930    emit_cmd(GENX(MI_PREDICATE), mip) {
931       mip.LoadOperation    = LOAD_LOAD;
932       mip.CombineOperation = COMBINE_SET;
933       mip.CompareOperation = COMPARE_TRUE;
934    }
935 
936    mi_store_if(&b, out_mem64(0), mi_imm(u64));
937    mi_store_if(&b, out_mem32(8), mi_imm(u32));
938 
939    /* Set predicate to false, write garbage that shouldn't land */
940    emit_cmd(GENX(MI_PREDICATE), mip) {
941       mip.LoadOperation    = LOAD_LOAD;
942       mip.CombineOperation = COMBINE_SET;
943       mip.CompareOperation = COMPARE_FALSE;
944    }
945 
946    mi_store_if(&b, out_mem64(0), mi_imm(0xd0d0d0d0d0d0d0d0ull));
947    mi_store_if(&b, out_mem32(8), mi_imm(0xc000c000));
948 
949    submit_batch();
950 
951    EXPECT_EQ(*(uint64_t *)(output + 0), u64);
952    EXPECT_EQ(*(uint32_t *)(output + 8), u32);
953    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
954 }
955 
956 #endif /* GFX_VERx10 >= 75 */
957 
958 #if GFX_VERx10 >= 125
959 
960 /*
961  * Indirect load/store tests.  Only available on XE_HP+
962  */
963 
TEST_F(mi_builder_test,load_mem64_offset)964 TEST_F(mi_builder_test, load_mem64_offset)
965 {
966    uint64_t values[8] = {
967       0x0123456789abcdef,
968       0xdeadbeefac0ffee2,
969       (uint64_t)-1,
970       1,
971       0,
972       1049571,
973       (uint64_t)-240058,
974       20204184,
975    };
976    memcpy(input, values, sizeof(values));
977 
978    uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
979    memcpy(input + 64, offsets, sizeof(offsets));
980 
981    for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
982       mi_store(&b, out_mem64(i * 8),
983                mi_load_mem64_offset(&b, in_addr(0), in_mem32(i * 4 + 64)));
984    }
985 
986    submit_batch();
987 
988    for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
989       EXPECT_EQ(*(uint64_t *)(output + i * 8), values[offsets[i] / 8]);
990 }
991 
TEST_F(mi_builder_test,store_mem64_offset)992 TEST_F(mi_builder_test, store_mem64_offset)
993 {
994    uint64_t values[8] = {
995       0x0123456789abcdef,
996       0xdeadbeefac0ffee2,
997       (uint64_t)-1,
998       1,
999       0,
1000       1049571,
1001       (uint64_t)-240058,
1002       20204184,
1003    };
1004    memcpy(input, values, sizeof(values));
1005 
1006    uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
1007    memcpy(input + 64, offsets, sizeof(offsets));
1008 
1009    for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
1010       mi_store_mem64_offset(&b, out_addr(0), in_mem32(i * 4 + 64),
1011                                 in_mem64(i * 8));
1012    }
1013 
1014    submit_batch();
1015 
1016    for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
1017       EXPECT_EQ(*(uint64_t *)(output + offsets[i]), values[i]);
1018 }
1019 
1020 /*
1021  * Control-flow tests.  Only available on XE_HP+
1022  */
1023 
TEST_F(mi_builder_test,goto)1024 TEST_F(mi_builder_test, goto)
1025 {
1026    const uint64_t value = 0xb453b411deadc0deull;
1027 
1028    mi_store(&b, out_mem64(0), mi_imm(value));
1029 
1030    struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1031    mi_goto(&b, &t);
1032 
1033    /* This one should be skipped */
1034    mi_store(&b, out_mem64(0), mi_imm(0));
1035 
1036    mi_goto_target(&b, &t);
1037 
1038    submit_batch();
1039 
1040    EXPECT_EQ(*(uint64_t *)(output + 0), value);
1041 }
1042 
1043 #define MI_PREDICATE_RESULT  0x2418
1044 
TEST_F(mi_builder_test,goto_if)1045 TEST_F(mi_builder_test, goto_if)
1046 {
1047    const uint64_t values[] = {
1048       0xb453b411deadc0deull,
1049       0x0123456789abcdefull,
1050       0,
1051    };
1052 
1053    mi_store(&b, out_mem64(0), mi_imm(values[0]));
1054 
1055    emit_cmd(GENX(MI_PREDICATE), mip) {
1056       mip.LoadOperation    = LOAD_LOAD;
1057       mip.CombineOperation = COMBINE_SET;
1058       mip.CompareOperation = COMPARE_FALSE;
1059    }
1060 
1061    struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1062    mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
1063 
1064    mi_store(&b, out_mem64(0), mi_imm(values[1]));
1065 
1066    emit_cmd(GENX(MI_PREDICATE), mip) {
1067       mip.LoadOperation    = LOAD_LOAD;
1068       mip.CombineOperation = COMBINE_SET;
1069       mip.CompareOperation = COMPARE_TRUE;
1070    }
1071 
1072    mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
1073 
1074    /* This one should be skipped */
1075    mi_store(&b, out_mem64(0), mi_imm(values[2]));
1076 
1077    mi_goto_target(&b, &t);
1078 
1079    submit_batch();
1080 
1081    EXPECT_EQ(*(uint64_t *)(output + 0), values[1]);
1082 }
1083 
TEST_F(mi_builder_test,loop_simple)1084 TEST_F(mi_builder_test, loop_simple)
1085 {
1086    const uint64_t loop_count = 8;
1087 
1088    mi_store(&b, out_mem64(0), mi_imm(0));
1089 
1090    mi_loop(&b) {
1091       mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1092 
1093       mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1094    }
1095 
1096    submit_batch();
1097 
1098    EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1099 }
1100 
TEST_F(mi_builder_test,loop_break)1101 TEST_F(mi_builder_test, loop_break)
1102 {
1103    mi_loop(&b) {
1104       mi_store(&b, out_mem64(0), mi_imm(1));
1105 
1106       mi_break_if(&b, mi_imm(0));
1107 
1108       mi_store(&b, out_mem64(0), mi_imm(2));
1109 
1110       mi_break(&b);
1111 
1112       mi_store(&b, out_mem64(0), mi_imm(3));
1113    }
1114 
1115    submit_batch();
1116 
1117    EXPECT_EQ(*(uint64_t *)(output + 0), 2);
1118 }
1119 
TEST_F(mi_builder_test,loop_continue)1120 TEST_F(mi_builder_test, loop_continue)
1121 {
1122    const uint64_t loop_count = 8;
1123 
1124    mi_store(&b, out_mem64(0), mi_imm(0));
1125    mi_store(&b, out_mem64(8), mi_imm(0));
1126 
1127    mi_loop(&b) {
1128       mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1129 
1130       mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1131       mi_store(&b, out_mem64(8), mi_imm(5));
1132 
1133       mi_continue(&b);
1134 
1135       mi_store(&b, out_mem64(8), mi_imm(10));
1136    }
1137 
1138    submit_batch();
1139 
1140    EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1141    EXPECT_EQ(*(uint64_t *)(output + 8), 5);
1142 }
1143 
TEST_F(mi_builder_test,loop_continue_if)1144 TEST_F(mi_builder_test, loop_continue_if)
1145 {
1146    const uint64_t loop_count = 8;
1147 
1148    mi_store(&b, out_mem64(0), mi_imm(0));
1149    mi_store(&b, out_mem64(8), mi_imm(0));
1150 
1151    mi_loop(&b) {
1152       mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1153 
1154       mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1155       mi_store(&b, out_mem64(8), mi_imm(5));
1156 
1157       emit_cmd(GENX(MI_PREDICATE), mip) {
1158          mip.LoadOperation    = LOAD_LOAD;
1159          mip.CombineOperation = COMBINE_SET;
1160          mip.CompareOperation = COMPARE_FALSE;
1161       }
1162 
1163       mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
1164 
1165       mi_store(&b, out_mem64(8), mi_imm(10));
1166 
1167       emit_cmd(GENX(MI_PREDICATE), mip) {
1168          mip.LoadOperation    = LOAD_LOAD;
1169          mip.CombineOperation = COMBINE_SET;
1170          mip.CompareOperation = COMPARE_TRUE;
1171       }
1172 
1173       mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
1174 
1175       mi_store(&b, out_mem64(8), mi_imm(15));
1176    }
1177 
1178    submit_batch();
1179 
1180    EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1181    EXPECT_EQ(*(uint64_t *)(output + 8), 10);
1182 }
1183 #endif /* GFX_VERx10 >= 125 */
1184