1; RUN: opt -lower-matrix-intrinsics -S < %s | FileCheck %s 2; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s 3 4define <9 x double> @strided_load_3x3_volatile(double* %in, i64 %stride) { 5; CHECK-LABEL: @strided_load_3x3_volatile( 6; CHECK-NEXT: entry: 7; CHECK-NEXT: [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]] 8; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* %in, i64 [[VEC_START]] 9; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[VEC_GEP]] to <3 x double>* 10; CHECK-NEXT: load volatile <3 x double>, <3 x double>* [[VEC_CAST]], align 8 11; CHECK-NEXT: [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]] 12; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, double* %in, i64 [[VEC_START1]] 13; CHECK-NEXT: [[VEC_CAST3:%.*]] = bitcast double* [[VEC_GEP2]] to <3 x double>* 14; CHECK-NEXT: load volatile <3 x double>, <3 x double>* [[VEC_CAST3]], align 8 15; CHECK-NEXT: [[VEC_START5:%.*]] = mul i64 2, [[STRIDE]] 16; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, double* %in, i64 [[VEC_START5]] 17; CHECK-NEXT: [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <3 x double>* 18; CHECK-NEXT: load volatile <3 x double>, <3 x double>* [[VEC_CAST7]], align 8 19; CHECK-NOT: = load 20; 21entry: 22 %load = call <9 x double> @llvm.matrix.column.major.load.v9f64(double* %in, i64 %stride, i1 true, i32 3, i32 3) 23 ret <9 x double> %load 24} 25 26declare <9 x double> @llvm.matrix.column.major.load.v9f64(double*, i64, i1, i32, i32) 27 28define <4 x double> @load_volatile_multiply(<4 x double>* %in) { 29; CHECK-LABEL: @load_volatile_multiply( 30; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double>* [[IN:%.*]] to double* 31; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[TMP1]] to <2 x double>* 32; CHECK-NEXT: load volatile <2 x double>, <2 x double>* [[VEC_CAST]], align 8 33; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP1]], i64 2 34; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>* 35; CHECK-NEXT: load volatile <2 x double>, <2 x double>* [[VEC_CAST1]], align 8 36; CHECK-NOT: = load 37; 38 %in.m = load volatile <4 x double>, <4 x double>* %in, align 8 39 %res = call <4 x double> @llvm.matrix.multiply(<4 x double> %in.m, <4 x double> %in.m, i32 2, i32 2, i32 2) 40 ret <4 x double> %res 41} 42 43declare <4 x double> @llvm.matrix.multiply(<4 x double>, <4 x double>, i32, i32, i32) 44 45 46define <9 x double> @strided_load_3x3_align32(double* %in, i64 %stride) { 47; CHECK-LABEL: @strided_load_3x3_align32( 48; CHECK-NEXT: entry: 49; CHECK-NEXT: [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]] 50; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* %in, i64 [[VEC_START]] 51; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[VEC_GEP]] to <3 x double>* 52; CHECK-NEXT: load <3 x double>, <3 x double>* [[VEC_CAST]], align 32 53; CHECK-NEXT: [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]] 54; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, double* %in, i64 [[VEC_START1]] 55; CHECK-NEXT: [[VEC_CAST3:%.*]] = bitcast double* [[VEC_GEP2]] to <3 x double>* 56; CHECK-NEXT: load <3 x double>, <3 x double>* [[VEC_CAST3]], align 8 57; CHECK-NEXT: [[VEC_START5:%.*]] = mul i64 2, [[STRIDE]] 58; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, double* %in, i64 [[VEC_START5]] 59; CHECK-NEXT: [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <3 x double>* 60; CHECK-NEXT: load <3 x double>, <3 x double>* [[VEC_CAST7]], align 8 61; CHECK-NOT: = load 62; 63entry: 64 %load = call <9 x double> @llvm.matrix.column.major.load.v9f64(double* align 32 %in, i64 %stride, i1 false, i32 3, i32 3) 65 ret <9 x double> %load 66} 67 68define <9 x double> @strided_load_3x3_align2(double* %in, i64 %stride) { 69; CHECK-LABEL: @strided_load_3x3_align2( 70; CHECK-NEXT: entry: 71; CHECK-NEXT: [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]] 72; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* %in, i64 [[VEC_START]] 73; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[VEC_GEP]] to <3 x double>* 74; CHECK-NEXT: load <3 x double>, <3 x double>* [[VEC_CAST]], align 2 75; CHECK-NEXT: [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]] 76; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, double* %in, i64 [[VEC_START1]] 77; CHECK-NEXT: [[VEC_CAST3:%.*]] = bitcast double* [[VEC_GEP2]] to <3 x double>* 78; CHECK-NEXT: load <3 x double>, <3 x double>* [[VEC_CAST3]], align 2 79; CHECK-NEXT: [[VEC_START5:%.*]] = mul i64 2, [[STRIDE]] 80; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, double* %in, i64 [[VEC_START5]] 81; CHECK-NEXT: [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <3 x double>* 82; CHECK-NEXT: load <3 x double>, <3 x double>* [[VEC_CAST7]], align 2 83; CHECK-NOT: = load 84; 85entry: 86 %load = call <9 x double> @llvm.matrix.column.major.load.v9f64(double* align 2 %in, i64 %stride, i1 false, i32 3, i32 3) 87 ret <9 x double> %load 88} 89 90 91define <4 x double> @load_align2_multiply(<4 x double>* %in) { 92; CHECK-LABEL: @load_align2_multiply( 93; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double>* [[IN:%.*]] to double* 94; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[TMP1]] to <2 x double>* 95; CHECK-NEXT: load <2 x double>, <2 x double>* [[VEC_CAST]], align 2 96; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP1]], i64 2 97; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>* 98; CHECK-NEXT: load <2 x double>, <2 x double>* [[VEC_CAST1]], align 2 99; CHECK-NOT: = load 100; 101 %in.m = load <4 x double>, <4 x double>* %in, align 2 102 %res = call <4 x double> @llvm.matrix.multiply(<4 x double> %in.m, <4 x double> %in.m, i32 2, i32 2, i32 2) 103 ret <4 x double> %res 104} 105 106define <6 x float> @strided_load_2x3_align16_stride2(float* %in) { 107; CHECK-LABEL: @strided_load_2x3_align16_stride2( 108; CHECK-NEXT: entry: 109; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast float* %in to <2 x float>* 110; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST]], align 16 111; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, float* %in, i64 2 112; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast float* [[VEC_GEP]] to <2 x float>* 113; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST1]], align 8 114; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr float, float* %in, i64 4 115; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast float* [[VEC_GEP3]] to <2 x float>* 116; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST4]], align 16 117; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 118; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[COL_LOAD5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 119; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5> 120; CHECK-NEXT: ret <6 x float> [[TMP3]] 121; 122entry: 123 %load = call <6 x float> @llvm.matrix.column.major.load.v6f32(float* align 16 %in, i64 2, i1 false, i32 2, i32 3) 124 ret <6 x float> %load 125} 126 127declare <6 x float> @llvm.matrix.column.major.load.v6f32(float*, i64, i1, i32, i32) 128