1 // REQUIRES: aarch64-registered-target
2 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
3 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s
4 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
5 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s
6 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null
7 #include <arm_sve.h>
8 
9 #ifdef SVE_OVERLOADED_FORMS
10 // A simple used,unused... macro, long enough to represent any SVE builtin.
11 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
12 #else
13 #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
14 #endif
15 
test_svcmla_f16_z(svbool_t pg,svfloat16_t op1,svfloat16_t op2,svfloat16_t op3)16 svfloat16_t test_svcmla_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
17 {
18   // CHECK-LABEL: test_svcmla_f16_z
19   // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
20   // CHECK-DAG: %[[SEL:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.sel.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %op1, <vscale x 8 x half> zeroinitializer)
21   // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %[[SEL]], <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 0)
22   // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
23   return SVE_ACLE_FUNC(svcmla,_f16,_z,)(pg, op1, op2, op3, 0);
24 }
25 
test_svcmla_f16_z_1(svbool_t pg,svfloat16_t op1,svfloat16_t op2,svfloat16_t op3)26 svfloat16_t test_svcmla_f16_z_1(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
27 {
28   // CHECK-LABEL: test_svcmla_f16_z_1
29   // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
30   // CHECK-DAG: %[[SEL:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.sel.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %op1, <vscale x 8 x half> zeroinitializer)
31   // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %[[SEL]], <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 90)
32   // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
33   return SVE_ACLE_FUNC(svcmla,_f16,_z,)(pg, op1, op2, op3, 90);
34 }
35 
test_svcmla_f16_z_2(svbool_t pg,svfloat16_t op1,svfloat16_t op2,svfloat16_t op3)36 svfloat16_t test_svcmla_f16_z_2(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
37 {
38   // CHECK-LABEL: test_svcmla_f16_z_2
39   // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
40   // CHECK-DAG: %[[SEL:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.sel.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %op1, <vscale x 8 x half> zeroinitializer)
41   // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %[[SEL]], <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 180)
42   // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
43   return SVE_ACLE_FUNC(svcmla,_f16,_z,)(pg, op1, op2, op3, 180);
44 }
45 
test_svcmla_f16_z_3(svbool_t pg,svfloat16_t op1,svfloat16_t op2,svfloat16_t op3)46 svfloat16_t test_svcmla_f16_z_3(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
47 {
48   // CHECK-LABEL: test_svcmla_f16_z_3
49   // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
50   // CHECK-DAG: %[[SEL:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.sel.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %op1, <vscale x 8 x half> zeroinitializer)
51   // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %[[SEL]], <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 270)
52   // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
53   return SVE_ACLE_FUNC(svcmla,_f16,_z,)(pg, op1, op2, op3, 270);
54 }
55 
test_svcmla_f32_z(svbool_t pg,svfloat32_t op1,svfloat32_t op2,svfloat32_t op3)56 svfloat32_t test_svcmla_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
57 {
58   // CHECK-LABEL: test_svcmla_f32_z
59   // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
60   // CHECK-DAG: %[[SEL:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.sel.nxv4f32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x float> %op1, <vscale x 4 x float> zeroinitializer)
61   // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.nxv4f32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x float> %[[SEL]], <vscale x 4 x float> %op2, <vscale x 4 x float> %op3, i32 0)
62   // CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
63   return SVE_ACLE_FUNC(svcmla,_f32,_z,)(pg, op1, op2, op3, 0);
64 }
65 
test_svcmla_f64_z(svbool_t pg,svfloat64_t op1,svfloat64_t op2,svfloat64_t op3)66 svfloat64_t test_svcmla_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
67 {
68   // CHECK-LABEL: test_svcmla_f64_z
69   // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
70   // CHECK-DAG: %[[SEL:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.sel.nxv2f64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x double> %op1, <vscale x 2 x double> zeroinitializer)
71   // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcmla.nxv2f64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x double> %[[SEL]], <vscale x 2 x double> %op2, <vscale x 2 x double> %op3, i32 90)
72   // CHECK: ret <vscale x 2 x double> %[[INTRINSIC]]
73   return SVE_ACLE_FUNC(svcmla,_f64,_z,)(pg, op1, op2, op3, 90);
74 }
75 
test_svcmla_f16_m(svbool_t pg,svfloat16_t op1,svfloat16_t op2,svfloat16_t op3)76 svfloat16_t test_svcmla_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
77 {
78   // CHECK-LABEL: test_svcmla_f16_m
79   // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
80   // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %op1, <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 180)
81   // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
82   return SVE_ACLE_FUNC(svcmla,_f16,_m,)(pg, op1, op2, op3, 180);
83 }
84 
test_svcmla_f32_m(svbool_t pg,svfloat32_t op1,svfloat32_t op2,svfloat32_t op3)85 svfloat32_t test_svcmla_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
86 {
87   // CHECK-LABEL: test_svcmla_f32_m
88   // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
89   // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.nxv4f32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x float> %op1, <vscale x 4 x float> %op2, <vscale x 4 x float> %op3, i32 270)
90   // CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
91   return SVE_ACLE_FUNC(svcmla,_f32,_m,)(pg, op1, op2, op3, 270);
92 }
93 
test_svcmla_f64_m(svbool_t pg,svfloat64_t op1,svfloat64_t op2,svfloat64_t op3)94 svfloat64_t test_svcmla_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
95 {
96   // CHECK-LABEL: test_svcmla_f64_m
97   // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
98   // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcmla.nxv2f64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x double> %op1, <vscale x 2 x double> %op2, <vscale x 2 x double> %op3, i32 0)
99   // CHECK: ret <vscale x 2 x double> %[[INTRINSIC]]
100   return SVE_ACLE_FUNC(svcmla,_f64,_m,)(pg, op1, op2, op3, 0);
101 }
102 
test_svcmla_f16_x(svbool_t pg,svfloat16_t op1,svfloat16_t op2,svfloat16_t op3)103 svfloat16_t test_svcmla_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
104 {
105   // CHECK-LABEL: test_svcmla_f16_x
106   // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
107   // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %op1, <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 90)
108   // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
109   return SVE_ACLE_FUNC(svcmla,_f16,_x,)(pg, op1, op2, op3, 90);
110 }
111 
test_svcmla_f32_x(svbool_t pg,svfloat32_t op1,svfloat32_t op2,svfloat32_t op3)112 svfloat32_t test_svcmla_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
113 {
114   // CHECK-LABEL: test_svcmla_f32_x
115   // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
116   // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.nxv4f32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x float> %op1, <vscale x 4 x float> %op2, <vscale x 4 x float> %op3, i32 180)
117   // CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
118   return SVE_ACLE_FUNC(svcmla,_f32,_x,)(pg, op1, op2, op3, 180);
119 }
120 
test_svcmla_f64_x(svbool_t pg,svfloat64_t op1,svfloat64_t op2,svfloat64_t op3)121 svfloat64_t test_svcmla_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
122 {
123   // CHECK-LABEL: test_svcmla_f64_x
124   // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
125   // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcmla.nxv2f64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x double> %op1, <vscale x 2 x double> %op2, <vscale x 2 x double> %op3, i32 270)
126   // CHECK: ret <vscale x 2 x double> %[[INTRINSIC]]
127   return SVE_ACLE_FUNC(svcmla,_f64,_x,)(pg, op1, op2, op3, 270);
128 }
129 
test_svcmla_lane_f16(svfloat16_t op1,svfloat16_t op2,svfloat16_t op3)130 svfloat16_t test_svcmla_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
131 {
132   // CHECK-LABEL: test_svcmla_lane_f16
133   // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.lane.nxv8f16(<vscale x 8 x half> %op1, <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 0, i32 0)
134   // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
135   return SVE_ACLE_FUNC(svcmla_lane,_f16,,)(op1, op2, op3, 0, 0);
136 }
137 
test_svcmla_lane_f16_1(svfloat16_t op1,svfloat16_t op2,svfloat16_t op3)138 svfloat16_t test_svcmla_lane_f16_1(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
139 {
140   // CHECK-LABEL: test_svcmla_lane_f16_1
141   // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.lane.nxv8f16(<vscale x 8 x half> %op1, <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 3, i32 90)
142   // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
143   return SVE_ACLE_FUNC(svcmla_lane,_f16,,)(op1, op2, op3, 3, 90);
144 }
145 
test_svcmla_lane_f32(svfloat32_t op1,svfloat32_t op2,svfloat32_t op3)146 svfloat32_t test_svcmla_lane_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
147 {
148   // CHECK-LABEL: test_svcmla_lane_f32
149   // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.lane.nxv4f32(<vscale x 4 x float> %op1, <vscale x 4 x float> %op2, <vscale x 4 x float> %op3, i32 0, i32 180)
150   // CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
151   return SVE_ACLE_FUNC(svcmla_lane,_f32,,)(op1, op2, op3, 0, 180);
152 }
153 
test_svcmla_lane_f32_1(svfloat32_t op1,svfloat32_t op2,svfloat32_t op3)154 svfloat32_t test_svcmla_lane_f32_1(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
155 {
156   // CHECK-LABEL: test_svcmla_lane_f32_1
157   // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.lane.nxv4f32(<vscale x 4 x float> %op1, <vscale x 4 x float> %op2, <vscale x 4 x float> %op3, i32 1, i32 270)
158   // CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
159   return SVE_ACLE_FUNC(svcmla_lane,_f32,,)(op1, op2, op3, 1, 270);
160 }
161