1; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s 2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t 3 4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. 5; WARN-NOT: warning 6 7; 8; Unpredicated dup instruction (which is an alias for mov): 9; * register + register, 10; * register + immediate 11; 12 13define <vscale x 16 x i8> @dup_i8(i8 %b) { 14; CHECK-LABEL: dup_i8: 15; CHECK: mov z0.b, w0 16; CHECK-NEXT: ret 17 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %b) 18 ret <vscale x 16 x i8> %out 19} 20 21define <vscale x 16 x i8> @dup_imm_i8() { 22; CHECK-LABEL: dup_imm_i8: 23; CHECK: mov z0.b, #16 24; CHECK-NEXT: ret 25 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 16) 26 ret <vscale x 16 x i8> %out 27} 28 29define <vscale x 8 x i16> @dup_i16(i16 %b) { 30; CHECK-LABEL: dup_i16: 31; CHECK: mov z0.h, w0 32; CHECK-NEXT: ret 33 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 %b) 34 ret <vscale x 8 x i16> %out 35} 36 37define <vscale x 8 x i16> @dup_imm_i16(i16 %b) { 38; CHECK-LABEL: dup_imm_i16: 39; CHECK: mov z0.h, #16 40; CHECK-NEXT: ret 41 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 16) 42 ret <vscale x 8 x i16> %out 43} 44 45define <vscale x 4 x i32> @dup_i32(i32 %b) { 46; CHECK-LABEL: dup_i32: 47; CHECK: mov z0.s, w0 48; CHECK-NEXT: ret 49 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 %b) 50 ret <vscale x 4 x i32> %out 51} 52 53define <vscale x 4 x i32> @dup_imm_i32(i32 %b) { 54; CHECK-LABEL: dup_imm_i32: 55; CHECK: mov z0.s, #16 56; CHECK-NEXT: ret 57 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 16) 58 ret <vscale x 4 x i32> %out 59} 60 61define <vscale x 2 x i64> @dup_i64(i64 %b) { 62; CHECK-LABEL: dup_i64: 63; CHECK: mov z0.d, x0 64; CHECK-NEXT: ret 65 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 %b) 66 ret <vscale x 2 x i64> %out 67} 68 69define <vscale x 2 x i64> @dup_imm_i64(i64 %b) { 70; CHECK-LABEL: dup_imm_i64: 71; CHECK: mov z0.d, #16 72; CHECK-NEXT: ret 73 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 16) 74 ret <vscale x 2 x i64> %out 75} 76 77define <vscale x 8 x half> @dup_f16(half %b) { 78; CHECK-LABEL: dup_f16: 79; CHECK: mov z0.h, h0 80; CHECK-NEXT: ret 81 %out = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half %b) 82 ret <vscale x 8 x half> %out 83} 84 85define <vscale x 8 x bfloat> @dup_bf16(bfloat %b) #0 { 86; CHECK-LABEL: dup_bf16: 87; CHECK: mov z0.h, h0 88; CHECK-NEXT: ret 89 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat %b) 90 ret <vscale x 8 x bfloat> %out 91} 92 93define <vscale x 8 x half> @dup_imm_f16(half %b) { 94; CHECK-LABEL: dup_imm_f16: 95; CHECK: mov z0.h, #16.00000000 96; CHECK-NEXT: ret 97 %out = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 16.) 98 ret <vscale x 8 x half> %out 99} 100 101define <vscale x 4 x float> @dup_f32(float %b) { 102; CHECK-LABEL: dup_f32: 103; CHECK: mov z0.s, s0 104; CHECK-NEXT: ret 105 %out = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float %b) 106 ret <vscale x 4 x float> %out 107} 108 109define <vscale x 4 x float> @dup_imm_f32(float %b) { 110; CHECK-LABEL: dup_imm_f32: 111; CHECK: mov z0.s, #16.00000000 112; CHECK-NEXT: ret 113 %out = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 16.) 114 ret <vscale x 4 x float> %out 115} 116 117define <vscale x 2 x double> @dup_f64(double %b) { 118; CHECK-LABEL: dup_f64: 119; CHECK: mov z0.d, d0 120; CHECK-NEXT: ret 121 %out = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double %b) 122 ret <vscale x 2 x double> %out 123} 124 125define <vscale x 2 x double> @dup_imm_f64(double %b) { 126; CHECK-LABEL: dup_imm_f64: 127; CHECK: mov z0.d, #16.00000000 128; CHECK-NEXT: ret 129 %out = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 16.) 130 ret <vscale x 2 x double> %out 131} 132 133declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8( i8) 134declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16) 135declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32) 136declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64) 137declare <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half) 138declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat) 139declare <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float) 140declare <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double) 141 142; +bf16 is required for the bfloat version. 143attributes #0 = { "target-features"="+sve,+bf16" } 144