1; RUN: llc -mtriple=arm64_32-apple-ios7.0 -mcpu=cyclone %s -o - | FileCheck %s 2 3define <2 x double> @test_insert_elt(<2 x double> %vec, double %val) { 4; CHECK-LABEL: test_insert_elt: 5; CHECK: mov.d v0[0], v1[0] 6 %res = insertelement <2 x double> %vec, double %val, i32 0 7 ret <2 x double> %res 8} 9 10define void @test_split_16B(<4 x float> %val, <4 x float>* %addr) { 11; CHECK-LABEL: test_split_16B: 12; CHECK: str q0, [x0] 13 store <4 x float> %val, <4 x float>* %addr, align 8 14 ret void 15} 16 17define void @test_split_16B_splat(<4 x i32>, <4 x i32>* %addr) { 18; CHECK-LABEL: test_split_16B_splat: 19; CHECK: str {{q[0-9]+}} 20 21 %vec.tmp0 = insertelement <4 x i32> undef, i32 42, i32 0 22 %vec.tmp1 = insertelement <4 x i32> %vec.tmp0, i32 42, i32 1 23 %vec.tmp2 = insertelement <4 x i32> %vec.tmp1, i32 42, i32 2 24 %vec = insertelement <4 x i32> %vec.tmp2, i32 42, i32 3 25 26 store <4 x i32> %vec, <4 x i32>* %addr, align 8 27 ret void 28} 29 30 31%vec = type <2 x double> 32 33declare {%vec, %vec} @llvm.aarch64.neon.ld2r.v2f64.p0i8(i8*) 34define {%vec, %vec} @test_neon_load(i8* %addr) { 35; CHECK-LABEL: test_neon_load: 36; CHECK: ld2r.2d { v0, v1 }, [x0] 37 %res = call {%vec, %vec} @llvm.aarch64.neon.ld2r.v2f64.p0i8(i8* %addr) 38 ret {%vec, %vec} %res 39} 40 41declare {%vec, %vec} @llvm.aarch64.neon.ld2lane.v2f64.p0i8(%vec, %vec, i64, i8*) 42define {%vec, %vec} @test_neon_load_lane(i8* %addr, %vec %in1, %vec %in2) { 43; CHECK-LABEL: test_neon_load_lane: 44; CHECK: ld2.d { v0, v1 }[0], [x0] 45 %res = call {%vec, %vec} @llvm.aarch64.neon.ld2lane.v2f64.p0i8(%vec %in1, %vec %in2, i64 0, i8* %addr) 46 ret {%vec, %vec} %res 47} 48 49declare void @llvm.aarch64.neon.st2.v2f64.p0i8(%vec, %vec, i8*) 50define void @test_neon_store(i8* %addr, %vec %in1, %vec %in2) { 51; CHECK-LABEL: test_neon_store: 52; CHECK: st2.2d { v0, v1 }, [x0] 53 call void @llvm.aarch64.neon.st2.v2f64.p0i8(%vec %in1, %vec %in2, i8* %addr) 54 ret void 55} 56 57declare void @llvm.aarch64.neon.st2lane.v2f64.p0i8(%vec, %vec, i64, i8*) 58define void @test_neon_store_lane(i8* %addr, %vec %in1, %vec %in2) { 59; CHECK-LABEL: test_neon_store_lane: 60; CHECK: st2.d { v0, v1 }[1], [x0] 61 call void @llvm.aarch64.neon.st2lane.v2f64.p0i8(%vec %in1, %vec %in2, i64 1, i8* %addr) 62 ret void 63} 64 65declare {%vec, %vec} @llvm.aarch64.neon.ld2.v2f64.p0i8(i8*) 66define {{%vec, %vec}, i8*} @test_neon_load_post(i8* %addr, i32 %offset) { 67; CHECK-LABEL: test_neon_load_post: 68; CHECK-DAG: sxtw [[OFFSET:x[0-9]+]], w1 69; CHECK: ld2.2d { v0, v1 }, [x0], [[OFFSET]] 70 71 %vecs = call {%vec, %vec} @llvm.aarch64.neon.ld2.v2f64.p0i8(i8* %addr) 72 73 %addr.new = getelementptr inbounds i8, i8* %addr, i32 %offset 74 75 %res.tmp = insertvalue {{%vec, %vec}, i8*} undef, {%vec, %vec} %vecs, 0 76 %res = insertvalue {{%vec, %vec}, i8*} %res.tmp, i8* %addr.new, 1 77 ret {{%vec, %vec}, i8*} %res 78} 79 80define {{%vec, %vec}, i8*} @test_neon_load_post_lane(i8* %addr, i32 %offset, %vec %in1, %vec %in2) { 81; CHECK-LABEL: test_neon_load_post_lane: 82; CHECK-DAG: sxtw [[OFFSET:x[0-9]+]], w1 83; CHECK: ld2.d { v0, v1 }[1], [x0], [[OFFSET]] 84 85 %vecs = call {%vec, %vec} @llvm.aarch64.neon.ld2lane.v2f64.p0i8(%vec %in1, %vec %in2, i64 1, i8* %addr) 86 87 %addr.new = getelementptr inbounds i8, i8* %addr, i32 %offset 88 89 %res.tmp = insertvalue {{%vec, %vec}, i8*} undef, {%vec, %vec} %vecs, 0 90 %res = insertvalue {{%vec, %vec}, i8*} %res.tmp, i8* %addr.new, 1 91 ret {{%vec, %vec}, i8*} %res 92} 93 94define i8* @test_neon_store_post(i8* %addr, i32 %offset, %vec %in1, %vec %in2) { 95; CHECK-LABEL: test_neon_store_post: 96; CHECK-DAG: sxtw [[OFFSET:x[0-9]+]], w1 97; CHECK: st2.2d { v0, v1 }, [x0], [[OFFSET]] 98 99 call void @llvm.aarch64.neon.st2.v2f64.p0i8(%vec %in1, %vec %in2, i8* %addr) 100 101 %addr.new = getelementptr inbounds i8, i8* %addr, i32 %offset 102 103 ret i8* %addr.new 104} 105 106define i8* @test_neon_store_post_lane(i8* %addr, i32 %offset, %vec %in1, %vec %in2) { 107; CHECK-LABEL: test_neon_store_post_lane: 108; CHECK: sxtw [[OFFSET:x[0-9]+]], w1 109; CHECK: st2.d { v0, v1 }[0], [x0], [[OFFSET]] 110 111 call void @llvm.aarch64.neon.st2lane.v2f64.p0i8(%vec %in1, %vec %in2, i64 0, i8* %addr) 112 113 %addr.new = getelementptr inbounds i8, i8* %addr, i32 %offset 114 115 ret i8* %addr.new 116} 117 118; ld1 is slightly different because it goes via ISelLowering of normal IR ops 119; rather than an intrinsic. 120define {%vec, double*} @test_neon_ld1_post_lane(double* %addr, i32 %offset, %vec %in) { 121; CHECK-LABEL: test_neon_ld1_post_lane: 122; CHECK: sbfiz [[OFFSET:x[0-9]+]], x1, #3, #32 123; CHECK: ld1.d { v0 }[0], [x0], [[OFFSET]] 124 125 %loaded = load double, double* %addr, align 8 126 %newvec = insertelement %vec %in, double %loaded, i32 0 127 128 %addr.new = getelementptr inbounds double, double* %addr, i32 %offset 129 130 %res.tmp = insertvalue {%vec, double*} undef, %vec %newvec, 0 131 %res = insertvalue {%vec, double*} %res.tmp, double* %addr.new, 1 132 133 ret {%vec, double*} %res 134} 135 136define {{%vec, %vec}, i8*} @test_neon_load_post_exact(i8* %addr) { 137; CHECK-LABEL: test_neon_load_post_exact: 138; CHECK: ld2.2d { v0, v1 }, [x0], #32 139 140 %vecs = call {%vec, %vec} @llvm.aarch64.neon.ld2.v2f64.p0i8(i8* %addr) 141 142 %addr.new = getelementptr inbounds i8, i8* %addr, i32 32 143 144 %res.tmp = insertvalue {{%vec, %vec}, i8*} undef, {%vec, %vec} %vecs, 0 145 %res = insertvalue {{%vec, %vec}, i8*} %res.tmp, i8* %addr.new, 1 146 ret {{%vec, %vec}, i8*} %res 147} 148 149define {%vec, double*} @test_neon_ld1_post_lane_exact(double* %addr, %vec %in) { 150; CHECK-LABEL: test_neon_ld1_post_lane_exact: 151; CHECK: ld1.d { v0 }[0], [x0], #8 152 153 %loaded = load double, double* %addr, align 8 154 %newvec = insertelement %vec %in, double %loaded, i32 0 155 156 %addr.new = getelementptr inbounds double, double* %addr, i32 1 157 158 %res.tmp = insertvalue {%vec, double*} undef, %vec %newvec, 0 159 %res = insertvalue {%vec, double*} %res.tmp, double* %addr.new, 1 160 161 ret {%vec, double*} %res 162} 163 164; As in the general load/store case, this GEP has defined semantics when the 165; address wraps. We cannot use post-indexed addressing. 166define {%vec, double*} @test_neon_ld1_notpost_lane_exact(double* %addr, %vec %in) { 167; CHECK-LABEL: test_neon_ld1_notpost_lane_exact: 168; CHECK-NOT: ld1.d { {{v[0-9]+}} }[0], [{{x[0-9]+|sp}}], #8 169; CHECK: add w0, w0, #8 170; CHECK: ret 171 172 %loaded = load double, double* %addr, align 8 173 %newvec = insertelement %vec %in, double %loaded, i32 0 174 175 %addr.new = getelementptr double, double* %addr, i32 1 176 177 %res.tmp = insertvalue {%vec, double*} undef, %vec %newvec, 0 178 %res = insertvalue {%vec, double*} %res.tmp, double* %addr.new, 1 179 180 ret {%vec, double*} %res 181} 182 183define {%vec, double*} @test_neon_ld1_notpost_lane(double* %addr, i32 %offset, %vec %in) { 184; CHECK-LABEL: test_neon_ld1_notpost_lane: 185; CHECK-NOT: ld1.d { {{v[0-9]+}} }[0], [{{x[0-9]+|sp}}], {{x[0-9]+|sp}} 186; CHECK: add w0, w0, w1, lsl #3 187; CHECK: ret 188 189 %loaded = load double, double* %addr, align 8 190 %newvec = insertelement %vec %in, double %loaded, i32 0 191 192 %addr.new = getelementptr double, double* %addr, i32 %offset 193 194 %res.tmp = insertvalue {%vec, double*} undef, %vec %newvec, 0 195 %res = insertvalue {%vec, double*} %res.tmp, double* %addr.new, 1 196 197 ret {%vec, double*} %res 198} 199