1 // RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512f -target-feature +amx-int8 \
2 // RUN: -target-feature +amx-bf16 -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK
3
4 #include <immintrin.h>
5
6 char buf[1024];
7 #define STRIDE 32
8
9 char buf2[1024];
10
11 // This is an example code and integration test.
test_api(int cond,short row,short col)12 void test_api(int cond, short row, short col) {
13 //CHECK-LABEL: @test_api
14 //CHECK: call x86_amx @llvm.x86.tileloadd64.internal
15 //CHECK: call x86_amx @llvm.x86.tdpbssd.internal
16 //CHECK: call void @llvm.x86.tilestored64.internal
17 __tile1024i a = {row, 8};
18 __tile1024i b = {8, col};
19 __tile1024i c = {row, col};
20
21 if (cond) {
22 __tile_loadd(&a, buf, STRIDE);
23 __tile_loadd(&b, buf, STRIDE);
24 __tile_loadd(&c, buf, STRIDE);
25 } else {
26 __tile_loadd(&a, buf2, STRIDE);
27 __tile_loadd(&b, buf2, STRIDE);
28 __tile_loadd(&c, buf2, STRIDE);
29 }
30 __tile_dpbssd(&c, a, b);
31 __tile_stored(buf, STRIDE, c);
32 }
33
test_tile_loadd(short row,short col)34 void test_tile_loadd(short row, short col) {
35 //CHECK-LABEL: @test_tile_loadd
36 //CHECK: call x86_amx @llvm.x86.tileloadd64.internal
37 //CHECK-NEXT: {{%.*}} = bitcast x86_amx {{%.*}} to <256 x i32>
38 __tile1024i a = {row, col};
39 __tile_loadd(&a, buf, STRIDE);
40 }
41
test_tile_stream_loadd(short row,short col)42 void test_tile_stream_loadd(short row, short col) {
43 //CHECK-LABEL: @test_tile_stream_loadd
44 //CHECK: call x86_amx @llvm.x86.tileloaddt164.internal
45 //CHECK-NEXT: {{%.*}} = bitcast x86_amx {{%.*}} to <256 x i32>
46 __tile1024i a = {row, col};
47 __tile_stream_loadd(&a, buf, STRIDE);
48 }
49
test_tile_dpbssd(__tile1024i a,__tile1024i b,__tile1024i c)50 void test_tile_dpbssd(__tile1024i a, __tile1024i b, __tile1024i c) {
51 //CHECK-LABEL: @test_tile_dpbssd
52 //CHECK: call x86_amx @llvm.x86.tdpbssd.internal
53 //CHECK-NEXT: {{%.*}} = bitcast x86_amx {{%.*}} to <256 x i32>
54 __tile_dpbssd(&c, a, b);
55 }
56
test_tile_dpbsud(__tile1024i a,__tile1024i b,__tile1024i c)57 void test_tile_dpbsud(__tile1024i a, __tile1024i b, __tile1024i c) {
58 //CHECK-LABEL: @test_tile_dpbsud
59 //CHECK: call x86_amx @llvm.x86.tdpbsud.internal
60 //CHECK-NEXT: {{%.*}} = bitcast x86_amx {{%.*}} to <256 x i32>
61 __tile_dpbsud(&c, a, b);
62 }
63
test_tile_dpbusd(__tile1024i a,__tile1024i b,__tile1024i c)64 void test_tile_dpbusd(__tile1024i a, __tile1024i b, __tile1024i c) {
65 //CHECK-LABEL: @test_tile_dpbusd
66 //CHECK: call x86_amx @llvm.x86.tdpbusd.internal
67 //CHECK-NEXT: {{%.*}} = bitcast x86_amx {{%.*}} to <256 x i32>
68 __tile_dpbusd(&c, a, b);
69 }
70
test_tile_dpbuud(__tile1024i a,__tile1024i b,__tile1024i c)71 void test_tile_dpbuud(__tile1024i a, __tile1024i b, __tile1024i c) {
72 //CHECK-LABEL: @test_tile_dpbuud
73 //CHECK: call x86_amx @llvm.x86.tdpbuud.internal
74 //CHECK-NEXT: {{%.*}} = bitcast x86_amx {{%.*}} to <256 x i32>
75 __tile_dpbuud(&c, a, b);
76 }
77
test_tile_stored(__tile1024i c)78 void test_tile_stored(__tile1024i c) {
79 //CHECK-LABEL: @test_tile_stored
80 //CHECK: {{%.*}} = bitcast <256 x i32> {{%.*}} to x86_amx
81 //CHECK-NEXT: call void @llvm.x86.tilestored64.internal
82 __tile_stored(buf, STRIDE, c);
83 }
84
test_tile_zero(__tile1024i c)85 void test_tile_zero(__tile1024i c) {
86 //CHECK-LABEL: @test_tile_zero
87 //CHECK: call x86_amx @llvm.x86.tilezero.internal
88 //CHECK-NEXT bitcast x86_amx {{%.*}} to <256 x i32>
89 __tile_zero(&c);
90 }
91
test_tile_dpbf16ps(__tile1024i a,__tile1024i b,__tile1024i c)92 void test_tile_dpbf16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
93 //CHECK-LABEL: @test_tile_dpbf16ps
94 //CHECK: call x86_amx @llvm.x86.tdpbf16ps.internal
95 //CHECK-NEXT: {{%.*}} = bitcast x86_amx {{%.*}} to <256 x i32>
96 __tile_dpbf16ps(&a, b, c);
97 }
98