1 // REQUIRES: aarch64-registered-target
2 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
3 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s
4 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
5 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s
6 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null
7 #include <arm_sve.h>
8
9 #ifdef SVE_OVERLOADED_FORMS
10 // A simple used,unused... macro, long enough to represent any SVE builtin.
11 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
12 #else
13 #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
14 #endif
15
test_svcmla_f16_z(svbool_t pg,svfloat16_t op1,svfloat16_t op2,svfloat16_t op3)16 svfloat16_t test_svcmla_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
17 {
18 // CHECK-LABEL: test_svcmla_f16_z
19 // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
20 // CHECK-DAG: %[[SEL:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.sel.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %op1, <vscale x 8 x half> zeroinitializer)
21 // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %[[SEL]], <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 0)
22 // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
23 return SVE_ACLE_FUNC(svcmla,_f16,_z,)(pg, op1, op2, op3, 0);
24 }
25
test_svcmla_f16_z_1(svbool_t pg,svfloat16_t op1,svfloat16_t op2,svfloat16_t op3)26 svfloat16_t test_svcmla_f16_z_1(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
27 {
28 // CHECK-LABEL: test_svcmla_f16_z_1
29 // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
30 // CHECK-DAG: %[[SEL:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.sel.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %op1, <vscale x 8 x half> zeroinitializer)
31 // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %[[SEL]], <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 90)
32 // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
33 return SVE_ACLE_FUNC(svcmla,_f16,_z,)(pg, op1, op2, op3, 90);
34 }
35
test_svcmla_f16_z_2(svbool_t pg,svfloat16_t op1,svfloat16_t op2,svfloat16_t op3)36 svfloat16_t test_svcmla_f16_z_2(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
37 {
38 // CHECK-LABEL: test_svcmla_f16_z_2
39 // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
40 // CHECK-DAG: %[[SEL:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.sel.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %op1, <vscale x 8 x half> zeroinitializer)
41 // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %[[SEL]], <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 180)
42 // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
43 return SVE_ACLE_FUNC(svcmla,_f16,_z,)(pg, op1, op2, op3, 180);
44 }
45
test_svcmla_f16_z_3(svbool_t pg,svfloat16_t op1,svfloat16_t op2,svfloat16_t op3)46 svfloat16_t test_svcmla_f16_z_3(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
47 {
48 // CHECK-LABEL: test_svcmla_f16_z_3
49 // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
50 // CHECK-DAG: %[[SEL:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.sel.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %op1, <vscale x 8 x half> zeroinitializer)
51 // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %[[SEL]], <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 270)
52 // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
53 return SVE_ACLE_FUNC(svcmla,_f16,_z,)(pg, op1, op2, op3, 270);
54 }
55
test_svcmla_f32_z(svbool_t pg,svfloat32_t op1,svfloat32_t op2,svfloat32_t op3)56 svfloat32_t test_svcmla_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
57 {
58 // CHECK-LABEL: test_svcmla_f32_z
59 // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
60 // CHECK-DAG: %[[SEL:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.sel.nxv4f32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x float> %op1, <vscale x 4 x float> zeroinitializer)
61 // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.nxv4f32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x float> %[[SEL]], <vscale x 4 x float> %op2, <vscale x 4 x float> %op3, i32 0)
62 // CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
63 return SVE_ACLE_FUNC(svcmla,_f32,_z,)(pg, op1, op2, op3, 0);
64 }
65
test_svcmla_f64_z(svbool_t pg,svfloat64_t op1,svfloat64_t op2,svfloat64_t op3)66 svfloat64_t test_svcmla_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
67 {
68 // CHECK-LABEL: test_svcmla_f64_z
69 // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
70 // CHECK-DAG: %[[SEL:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.sel.nxv2f64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x double> %op1, <vscale x 2 x double> zeroinitializer)
71 // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcmla.nxv2f64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x double> %[[SEL]], <vscale x 2 x double> %op2, <vscale x 2 x double> %op3, i32 90)
72 // CHECK: ret <vscale x 2 x double> %[[INTRINSIC]]
73 return SVE_ACLE_FUNC(svcmla,_f64,_z,)(pg, op1, op2, op3, 90);
74 }
75
test_svcmla_f16_m(svbool_t pg,svfloat16_t op1,svfloat16_t op2,svfloat16_t op3)76 svfloat16_t test_svcmla_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
77 {
78 // CHECK-LABEL: test_svcmla_f16_m
79 // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
80 // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %op1, <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 180)
81 // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
82 return SVE_ACLE_FUNC(svcmla,_f16,_m,)(pg, op1, op2, op3, 180);
83 }
84
test_svcmla_f32_m(svbool_t pg,svfloat32_t op1,svfloat32_t op2,svfloat32_t op3)85 svfloat32_t test_svcmla_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
86 {
87 // CHECK-LABEL: test_svcmla_f32_m
88 // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
89 // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.nxv4f32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x float> %op1, <vscale x 4 x float> %op2, <vscale x 4 x float> %op3, i32 270)
90 // CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
91 return SVE_ACLE_FUNC(svcmla,_f32,_m,)(pg, op1, op2, op3, 270);
92 }
93
test_svcmla_f64_m(svbool_t pg,svfloat64_t op1,svfloat64_t op2,svfloat64_t op3)94 svfloat64_t test_svcmla_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
95 {
96 // CHECK-LABEL: test_svcmla_f64_m
97 // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
98 // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcmla.nxv2f64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x double> %op1, <vscale x 2 x double> %op2, <vscale x 2 x double> %op3, i32 0)
99 // CHECK: ret <vscale x 2 x double> %[[INTRINSIC]]
100 return SVE_ACLE_FUNC(svcmla,_f64,_m,)(pg, op1, op2, op3, 0);
101 }
102
test_svcmla_f16_x(svbool_t pg,svfloat16_t op1,svfloat16_t op2,svfloat16_t op3)103 svfloat16_t test_svcmla_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
104 {
105 // CHECK-LABEL: test_svcmla_f16_x
106 // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
107 // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %op1, <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 90)
108 // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
109 return SVE_ACLE_FUNC(svcmla,_f16,_x,)(pg, op1, op2, op3, 90);
110 }
111
test_svcmla_f32_x(svbool_t pg,svfloat32_t op1,svfloat32_t op2,svfloat32_t op3)112 svfloat32_t test_svcmla_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
113 {
114 // CHECK-LABEL: test_svcmla_f32_x
115 // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
116 // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.nxv4f32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x float> %op1, <vscale x 4 x float> %op2, <vscale x 4 x float> %op3, i32 180)
117 // CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
118 return SVE_ACLE_FUNC(svcmla,_f32,_x,)(pg, op1, op2, op3, 180);
119 }
120
test_svcmla_f64_x(svbool_t pg,svfloat64_t op1,svfloat64_t op2,svfloat64_t op3)121 svfloat64_t test_svcmla_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
122 {
123 // CHECK-LABEL: test_svcmla_f64_x
124 // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
125 // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcmla.nxv2f64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x double> %op1, <vscale x 2 x double> %op2, <vscale x 2 x double> %op3, i32 270)
126 // CHECK: ret <vscale x 2 x double> %[[INTRINSIC]]
127 return SVE_ACLE_FUNC(svcmla,_f64,_x,)(pg, op1, op2, op3, 270);
128 }
129
test_svcmla_lane_f16(svfloat16_t op1,svfloat16_t op2,svfloat16_t op3)130 svfloat16_t test_svcmla_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
131 {
132 // CHECK-LABEL: test_svcmla_lane_f16
133 // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.lane.nxv8f16(<vscale x 8 x half> %op1, <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 0, i32 0)
134 // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
135 return SVE_ACLE_FUNC(svcmla_lane,_f16,,)(op1, op2, op3, 0, 0);
136 }
137
test_svcmla_lane_f16_1(svfloat16_t op1,svfloat16_t op2,svfloat16_t op3)138 svfloat16_t test_svcmla_lane_f16_1(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
139 {
140 // CHECK-LABEL: test_svcmla_lane_f16_1
141 // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.lane.nxv8f16(<vscale x 8 x half> %op1, <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 3, i32 90)
142 // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
143 return SVE_ACLE_FUNC(svcmla_lane,_f16,,)(op1, op2, op3, 3, 90);
144 }
145
test_svcmla_lane_f32(svfloat32_t op1,svfloat32_t op2,svfloat32_t op3)146 svfloat32_t test_svcmla_lane_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
147 {
148 // CHECK-LABEL: test_svcmla_lane_f32
149 // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.lane.nxv4f32(<vscale x 4 x float> %op1, <vscale x 4 x float> %op2, <vscale x 4 x float> %op3, i32 0, i32 180)
150 // CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
151 return SVE_ACLE_FUNC(svcmla_lane,_f32,,)(op1, op2, op3, 0, 180);
152 }
153
test_svcmla_lane_f32_1(svfloat32_t op1,svfloat32_t op2,svfloat32_t op3)154 svfloat32_t test_svcmla_lane_f32_1(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
155 {
156 // CHECK-LABEL: test_svcmla_lane_f32_1
157 // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.lane.nxv4f32(<vscale x 4 x float> %op1, <vscale x 4 x float> %op2, <vscale x 4 x float> %op3, i32 1, i32 270)
158 // CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
159 return SVE_ACLE_FUNC(svcmla_lane,_f32,,)(op1, op2, op3, 1, 270);
160 }
161