1 // RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown  -target-feature +avx512f  -target-feature +amx-int8  \
2 // RUN: -target-feature +amx-bf16 -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK
3 
4 #include <immintrin.h>
5 
6 char buf[1024];
7 #define STRIDE 32
8 
9 char buf2[1024];
10 
11 // This is an example code and integration test.
test_api(int cond,short row,short col)12 void test_api(int cond, short row, short col) {
13   //CHECK-LABEL: @test_api
14   //CHECK: call x86_amx @llvm.x86.tileloadd64.internal
15   //CHECK: call x86_amx @llvm.x86.tdpbssd.internal
16   //CHECK: call void @llvm.x86.tilestored64.internal
17   __tile1024i a = {row, 8};
18   __tile1024i b = {8, col};
19   __tile1024i c = {row, col};
20 
21   if (cond) {
22     __tile_loadd(&a, buf, STRIDE);
23     __tile_loadd(&b, buf, STRIDE);
24     __tile_loadd(&c, buf, STRIDE);
25   } else {
26     __tile_loadd(&a, buf2, STRIDE);
27     __tile_loadd(&b, buf2, STRIDE);
28     __tile_loadd(&c, buf2, STRIDE);
29   }
30   __tile_dpbssd(&c, a, b);
31   __tile_stored(buf, STRIDE, c);
32 }
33 
test_tile_loadd(short row,short col)34 void test_tile_loadd(short row, short col) {
35   //CHECK-LABEL: @test_tile_loadd
36   //CHECK: call x86_amx @llvm.x86.tileloadd64.internal
37   //CHECK-NEXT: {{%.*}} = bitcast x86_amx {{%.*}} to <256 x i32>
38   __tile1024i a = {row, col};
39   __tile_loadd(&a, buf, STRIDE);
40 }
41 
test_tile_stream_loadd(short row,short col)42 void test_tile_stream_loadd(short row, short col) {
43   //CHECK-LABEL: @test_tile_stream_loadd
44   //CHECK: call x86_amx @llvm.x86.tileloaddt164.internal
45   //CHECK-NEXT: {{%.*}} = bitcast x86_amx {{%.*}} to <256 x i32>
46   __tile1024i a = {row, col};
47   __tile_stream_loadd(&a, buf, STRIDE);
48 }
49 
test_tile_dpbssd(__tile1024i a,__tile1024i b,__tile1024i c)50 void test_tile_dpbssd(__tile1024i a, __tile1024i b, __tile1024i c) {
51   //CHECK-LABEL: @test_tile_dpbssd
52   //CHECK: call x86_amx @llvm.x86.tdpbssd.internal
53   //CHECK-NEXT: {{%.*}} = bitcast x86_amx {{%.*}} to <256 x i32>
54   __tile_dpbssd(&c, a, b);
55 }
56 
test_tile_dpbsud(__tile1024i a,__tile1024i b,__tile1024i c)57 void test_tile_dpbsud(__tile1024i a, __tile1024i b, __tile1024i c) {
58   //CHECK-LABEL: @test_tile_dpbsud
59   //CHECK: call x86_amx @llvm.x86.tdpbsud.internal
60   //CHECK-NEXT: {{%.*}} = bitcast x86_amx {{%.*}} to <256 x i32>
61   __tile_dpbsud(&c, a, b);
62 }
63 
test_tile_dpbusd(__tile1024i a,__tile1024i b,__tile1024i c)64 void test_tile_dpbusd(__tile1024i a, __tile1024i b, __tile1024i c) {
65   //CHECK-LABEL: @test_tile_dpbusd
66   //CHECK: call x86_amx @llvm.x86.tdpbusd.internal
67   //CHECK-NEXT: {{%.*}} = bitcast x86_amx {{%.*}} to <256 x i32>
68   __tile_dpbusd(&c, a, b);
69 }
70 
test_tile_dpbuud(__tile1024i a,__tile1024i b,__tile1024i c)71 void test_tile_dpbuud(__tile1024i a, __tile1024i b, __tile1024i c) {
72   //CHECK-LABEL: @test_tile_dpbuud
73   //CHECK: call x86_amx @llvm.x86.tdpbuud.internal
74   //CHECK-NEXT: {{%.*}} = bitcast x86_amx {{%.*}} to <256 x i32>
75   __tile_dpbuud(&c, a, b);
76 }
77 
test_tile_stored(__tile1024i c)78 void test_tile_stored(__tile1024i c) {
79   //CHECK-LABEL: @test_tile_stored
80   //CHECK: {{%.*}} = bitcast <256 x i32> {{%.*}} to x86_amx
81   //CHECK-NEXT: call void @llvm.x86.tilestored64.internal
82   __tile_stored(buf, STRIDE, c);
83 }
84 
test_tile_zero(__tile1024i c)85 void test_tile_zero(__tile1024i c) {
86   //CHECK-LABEL: @test_tile_zero
87   //CHECK: call x86_amx @llvm.x86.tilezero.internal
88   //CHECK-NEXT bitcast x86_amx {{%.*}} to <256 x i32>
89   __tile_zero(&c);
90 }
91 
test_tile_dpbf16ps(__tile1024i a,__tile1024i b,__tile1024i c)92 void test_tile_dpbf16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
93   //CHECK-LABEL: @test_tile_dpbf16ps
94   //CHECK: call x86_amx @llvm.x86.tdpbf16ps.internal
95   //CHECK-NEXT: {{%.*}} = bitcast x86_amx {{%.*}} to <256 x i32>
96   __tile_dpbf16ps(&a, b, c);
97 }
98