1; RUN: llc < %s -mtriple=arm-eabi -mcpu=krait | FileCheck %s 2 3define void @func1(i16* %a, i16* %b, i16* %c) { 4entry: 5; The test case trying to vectorize the pseudo code below. 6; a[i] = b[i] + c[i]; 7; b[i] = a[i] * c[i]; 8; a[i] = b[i] + a[i] * c[i]; 9; 10; Checking that vector load a[i] for "a[i] = b[i] + a[i] * c[i]" is 11; scheduled before the first vector store to "a[i] = b[i] + c[i]". 12; Checking that there is no vector load a[i] scheduled between the vector 13; stores to a[i], otherwise the load of a[i] will be polluted by the first 14; vector store to a[i]. 15; 16; This test case check that the chain information is updated during 17; lowerMUL for the new created Load SDNode. 18 19; CHECK: vldr {{.*}} [r0, #16] 20; CHECK: vstr {{.*}} [r0, #16] 21; CHECK-NOT: vldr {{.*}} [r0, #16] 22; CHECK: vstr {{.*}} [r0, #16] 23 24 %scevgep0 = getelementptr i16, i16* %a, i32 8 25 %vector_ptr0 = bitcast i16* %scevgep0 to <4 x i16>* 26 %vec0 = load <4 x i16>, <4 x i16>* %vector_ptr0, align 8 27 %scevgep1 = getelementptr i16, i16* %b, i32 8 28 %vector_ptr1 = bitcast i16* %scevgep1 to <4 x i16>* 29 %vec1 = load <4 x i16>, <4 x i16>* %vector_ptr1, align 8 30 %0 = zext <4 x i16> %vec1 to <4 x i32> 31 %scevgep2 = getelementptr i16, i16* %c, i32 8 32 %vector_ptr2 = bitcast i16* %scevgep2 to <4 x i16>* 33 %vec2 = load <4 x i16>, <4 x i16>* %vector_ptr2, align 8 34 %1 = sext <4 x i16> %vec2 to <4 x i32> 35 %vec3 = add <4 x i32> %1, %0 36 %2 = trunc <4 x i32> %vec3 to <4 x i16> 37 %scevgep3 = getelementptr i16, i16* %a, i32 8 38 %vector_ptr3 = bitcast i16* %scevgep3 to <4 x i16>* 39 store <4 x i16> %2, <4 x i16>* %vector_ptr3, align 8 40 %vector_ptr4 = bitcast i16* %scevgep2 to <4 x i16>* 41 %vec4 = load <4 x i16>, <4 x i16>* %vector_ptr4, align 8 42 %3 = sext <4 x i16> %vec4 to <4 x i32> 43 %vec5 = mul <4 x i32> %3, %vec3 44 %4 = trunc <4 x i32> %vec5 to <4 x i16> 45 %vector_ptr5 = bitcast i16* %scevgep1 to <4 x i16>* 46 store <4 x i16> %4, <4 x i16>* %vector_ptr5, align 8 47 %5 = sext <4 x i16> %vec0 to <4 x i32> 48 %vector_ptr6 = bitcast i16* %scevgep2 to <4 x i16>* 49 %vec6 = load <4 x i16>, <4 x i16>* %vector_ptr6, align 8 50 %6 = sext <4 x i16> %vec6 to <4 x i32> 51 %vec7 = mul <4 x i32> %6, %5 52 %vec8 = add <4 x i32> %vec7, %vec5 53 %7 = trunc <4 x i32> %vec8 to <4 x i16> 54 %vector_ptr7 = bitcast i16* %scevgep3 to <4 x i16>* 55 store <4 x i16> %7, <4 x i16>* %vector_ptr7, align 8 56 ret void 57} 58 59define void @func2(i16* %a, i16* %b, i16* %c) { 60entry: 61; The test case trying to vectorize the pseudo code below. 62; a[i] = b[i] + c[i]; 63; b[i] = a[i] * c[i]; 64; a[i] = b[i] + a[i] * c[i] + a[i]; 65; 66; Checking that vector load a[i] for "a[i] = b[i] + a[i] * c[i] + a[i]" 67; is scheduled before the first vector store to "a[i] = b[i] + c[i]". 68; Checking that there is no vector load a[i] scheduled between the first 69; vector store to a[i] and the vector add of a[i], otherwise the load of 70; a[i] will be polluted by the first vector store to a[i]. 71; 72; This test case check that both the chain and value of the new created 73; Load SDNode are updated during lowerMUL. 74 75; CHECK: vldr {{.*}} [r0, #16] 76; CHECK: vstr {{.*}} [r0, #16] 77; CHECK-NOT: vldr {{.*}} [r0, #16] 78; CHECK: vaddw.s16 79; CHECK: vstr {{.*}} [r0, #16] 80 81 %scevgep0 = getelementptr i16, i16* %a, i32 8 82 %vector_ptr0 = bitcast i16* %scevgep0 to <4 x i16>* 83 %vec0 = load <4 x i16>, <4 x i16>* %vector_ptr0, align 8 84 %scevgep1 = getelementptr i16, i16* %b, i32 8 85 %vector_ptr1 = bitcast i16* %scevgep1 to <4 x i16>* 86 %vec1 = load <4 x i16>, <4 x i16>* %vector_ptr1, align 8 87 %0 = zext <4 x i16> %vec1 to <4 x i32> 88 %scevgep2 = getelementptr i16, i16* %c, i32 8 89 %vector_ptr2 = bitcast i16* %scevgep2 to <4 x i16>* 90 %vec2 = load <4 x i16>, <4 x i16>* %vector_ptr2, align 8 91 %1 = sext <4 x i16> %vec2 to <4 x i32> 92 %vec3 = add <4 x i32> %1, %0 93 %2 = trunc <4 x i32> %vec3 to <4 x i16> 94 %scevgep3 = getelementptr i16, i16* %a, i32 8 95 %vector_ptr3 = bitcast i16* %scevgep3 to <4 x i16>* 96 store <4 x i16> %2, <4 x i16>* %vector_ptr3, align 8 97 %vector_ptr4 = bitcast i16* %scevgep2 to <4 x i16>* 98 %vec4 = load <4 x i16>, <4 x i16>* %vector_ptr4, align 8 99 %3 = sext <4 x i16> %vec4 to <4 x i32> 100 %vec5 = mul <4 x i32> %3, %vec3 101 %4 = trunc <4 x i32> %vec5 to <4 x i16> 102 %vector_ptr5 = bitcast i16* %scevgep1 to <4 x i16>* 103 store <4 x i16> %4, <4 x i16>* %vector_ptr5, align 8 104 %5 = sext <4 x i16> %vec0 to <4 x i32> 105 %vector_ptr6 = bitcast i16* %scevgep2 to <4 x i16>* 106 %vec6 = load <4 x i16>, <4 x i16>* %vector_ptr6, align 8 107 %6 = sext <4 x i16> %vec6 to <4 x i32> 108 %vec7 = mul <4 x i32> %6, %5 109 %vec8 = add <4 x i32> %vec7, %vec5 110 %vec9 = add <4 x i32> %vec8, %5 111 %7 = trunc <4 x i32> %vec9 to <4 x i16> 112 %vector_ptr7 = bitcast i16* %scevgep3 to <4 x i16>* 113 store <4 x i16> %7, <4 x i16>* %vector_ptr7, align 8 114 ret void 115} 116