1declare void @llvm.trap() noreturn nounwind 2 3declare <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32>) 4declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) 5declare <64 x i32> @llvm.hexagon.V6.vshuffvdd.128B(<32 x i32>, <32 x i32>, i32) 6declare <64 x i32> @llvm.hexagon.V6.vdealvdd.128B(<32 x i32>, <32 x i32>, i32) 7declare <32 x i32> @llvm.hexagon.V6.vasrwhsat.128B(<32 x i32>, <32 x i32>, i32) 8declare <32 x i32> @llvm.hexagon.V6.vsathub.128B(<32 x i32>, <32 x i32>) 9 10define weak_odr <64 x i32> @halide.hexagon.interleave.vw(<64 x i32> %arg) nounwind uwtable readnone alwaysinline { 11 %e = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %arg) 12 %o = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %arg) 13 %r = tail call <64 x i32> @llvm.hexagon.V6.vshuffvdd.128B(<32 x i32> %o, <32 x i32> %e, i32 -4) 14 ret <64 x i32> %r 15} 16 17define weak_odr <128 x i16> @halide.hexagon.interleave.vh(<128 x i16> %arg) nounwind uwtable readnone alwaysinline { 18 %arg_32 = bitcast <128 x i16> %arg to <64 x i32> 19 %e = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %arg_32) 20 %o = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %arg_32) 21 %r_32 = tail call <64 x i32> @llvm.hexagon.V6.vshuffvdd.128B(<32 x i32> %o, <32 x i32> %e, i32 -2) 22 %r = bitcast <64 x i32> %r_32 to <128 x i16> 23 ret <128 x i16> %r 24} 25 26define weak_odr <256 x i8> @halide.hexagon.interleave.vb(<256 x i8> %arg) nounwind uwtable readnone alwaysinline { 27 %arg_32 = bitcast <256 x i8> %arg to <64 x i32> 28 %e = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %arg_32) 29 %o = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %arg_32) 30 %r_32 = tail call <64 x i32> @llvm.hexagon.V6.vshuffvdd.128B(<32 x i32> %o, <32 x i32> %e, i32 -1) 31 %r = bitcast <64 x i32> %r_32 to <256 x i8> 32 ret <256 x i8> %r 33} 34 35 36declare <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32>, <32 x i32>) 37 38define weak_odr <64 x i32> @halide.hexagon.deinterleave.vw(<64 x i32> %arg) nounwind uwtable readnone alwaysinline { 39 %e = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %arg) 40 %o = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %arg) 41 %r = call <64 x i32> @llvm.hexagon.V6.vdealvdd.128B(<32 x i32> %o, <32 x i32> %e, i32 -4) 42 ret <64 x i32> %r 43} 44 45define weak_odr <128 x i16> @halide.hexagon.deinterleave.vh(<128 x i16> %arg) nounwind uwtable readnone alwaysinline { 46 %arg_32 = bitcast <128 x i16> %arg to <64 x i32> 47 %e = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %arg_32) 48 %o = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %arg_32) 49 %r_32 = call <64 x i32> @llvm.hexagon.V6.vdealvdd.128B(<32 x i32> %o, <32 x i32> %e, i32 -2) 50 %r = bitcast <64 x i32> %r_32 to <128 x i16> 51 ret <128 x i16> %r 52} 53 54define weak_odr <256 x i8> @halide.hexagon.deinterleave.vb(<256 x i8> %arg) nounwind uwtable readnone alwaysinline { 55 %arg_32 = bitcast <256 x i8> %arg to <64 x i32> 56 %e = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %arg_32) 57 %o = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %arg_32) 58 %r_32 = call <64 x i32> @llvm.hexagon.V6.vdealvdd.128B(<32 x i32> %o, <32 x i32> %e, i32 -1) 59 %r = bitcast <64 x i32> %r_32 to <256 x i8> 60 ret <256 x i8> %r 61} 62 63declare <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32) 64declare i32 @llvm.hexagon.S2.vsplatrb(i32) 65 66 67define weak_odr i16 @halide.hexagon.dup2.b(i8 %arg) nounwind uwtable readnone alwaysinline { 68 %arg_i16 = zext i8 %arg to i16 69 %arg_i16_s = shl i16 %arg_i16, 8 70 %r = or i16 %arg_i16, %arg_i16_s 71 ret i16 %r 72} 73 74define weak_odr i32 @halide.hexagon.dup2.h(i16 %arg) nounwind uwtable readnone alwaysinline { 75 %arg_i32 = zext i16 %arg to i32 76 %arg_i32_s = shl i32 %arg_i32, 16 77 %r = or i32 %arg_i32, %arg_i32_s 78 ret i32 %r 79} 80 81define weak_odr i32 @halide.hexagon.dup4.b(i8 %arg) nounwind uwtable readnone alwaysinline { 82 %arg_i32 = zext i8 %arg to i32 83 %dup4 = tail call i32 @llvm.hexagon.S2.vsplatrb(i32 %arg_i32) 84 ret i32 %dup4 85} 86 87define weak_odr i32 @halide.hexagon.interleave.b.dup2.h(i8 %low, i8 %high) nounwind uwtable readnone alwaysinline { 88 %high_i16 = zext i8 %high to i16 89 %high_i16_s = shl i16 %high_i16, 8 90 %low_i16 = zext i8 %low to i16 91 %i16_const = or i16 %high_i16_s, %low_i16 92 %r = call i32 @halide.hexagon.dup2.h(i16 %i16_const) 93 ret i32 %r 94} 95 96define weak_odr <128 x i8> @halide.hexagon.splat.b(i8 %arg) nounwind uwtable readnone alwaysinline { 97 %dup4 = call i32 @halide.hexagon.dup4.b(i8 %arg) 98 %r_32 = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 %dup4) 99 %r = bitcast <32 x i32> %r_32 to <128 x i8> 100 ret <128 x i8> %r 101} 102 103define weak_odr <64 x i16> @halide.hexagon.splat.h(i16 %arg) nounwind uwtable readnone alwaysinline { 104 %dup2 = call i32 @halide.hexagon.dup2.h(i16 %arg) 105 %r_32 = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 %dup2) 106 %r = bitcast <32 x i32> %r_32 to <64 x i16> 107 ret <64 x i16> %r 108} 109 110; Implement various 32 bit multiplications. 111declare <32 x i32> @llvm.hexagon.V6.vaslw.128B(<32 x i32>, i32) 112declare <32 x i32> @llvm.hexagon.V6.vaslw.acc.128B(<32 x i32>, <32 x i32>, i32) 113declare <32 x i32> @llvm.hexagon.V6.vlsrw.128B(<32 x i32>, i32) 114declare <32 x i32> @llvm.hexagon.V6.vmpyieoh.128B(<32 x i32>, <32 x i32>) 115declare <32 x i32> @llvm.hexagon.V6.vmpyiowh.128B(<32 x i32>, <32 x i32>) 116declare <32 x i32> @llvm.hexagon.V6.vmpyiewuh.128B(<32 x i32>, <32 x i32>) 117declare <32 x i32> @llvm.hexagon.V6.vmpyiewuh.acc.128B(<32 x i32>, <32 x i32>, <32 x i32>) 118declare <32 x i32> @llvm.hexagon.V6.vshufeh.128B(<32 x i32>, <32 x i32>) 119declare <32 x i32> @llvm.hexagon.V6.vshufoh.128B(<32 x i32>, <32 x i32>) 120declare <64 x i32> @llvm.hexagon.V6.vmpyuhv.128B(<32 x i32>, <32 x i32>) 121declare <64 x i32> @llvm.hexagon.V6.vmpyuhv.acc.128B(<64 x i32>, <32 x i32>, <32 x i32>) 122 123define weak_odr <32 x i32> @halide.hexagon.mul.vw.vw(<32 x i32> %a, <32 x i32> %b) nounwind uwtable readnone alwaysinline { 124 %ab1 = call <32 x i32> @llvm.hexagon.V6.vmpyieoh.128B(<32 x i32> %a, <32 x i32> %b) 125 %ab = call <32 x i32> @llvm.hexagon.V6.vmpyiewuh.acc.128B(<32 x i32> %ab1, <32 x i32> %a, <32 x i32> %b) 126 ret <32 x i32> %ab 127} 128 129define weak_odr <64 x i32> @halide.hexagon.mul.vw.vh(<64 x i32> %a, <64 x i16> %b) nounwind uwtable readnone alwaysinline { 130 %a_lo = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %a) 131 %a_hi = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %a) 132 %b_hi = bitcast <64 x i16> %b to <32 x i32> 133 %b_lo = call <32 x i32> @llvm.hexagon.V6.vaslw.128B(<32 x i32> %b_hi, i32 16) 134 %ab_lo = call <32 x i32> @llvm.hexagon.V6.vmpyiowh.128B(<32 x i32> %a_lo, <32 x i32> %b_lo) 135 %ab_hi = call <32 x i32> @llvm.hexagon.V6.vmpyiowh.128B(<32 x i32> %a_hi, <32 x i32> %b_hi) 136 %ab = call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %ab_hi, <32 x i32> %ab_lo) 137 ret <64 x i32> %ab 138} 139 140define weak_odr <64 x i32> @halide.hexagon.mul.vw.vuh(<64 x i32> %a, <64 x i16> %b) nounwind uwtable readnone alwaysinline { 141 %a_lo = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %a) 142 %a_hi = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %a) 143 %b_lo = bitcast <64 x i16> %b to <32 x i32> 144 %b_hi = call <32 x i32> @llvm.hexagon.V6.vlsrw.128B(<32 x i32> %b_lo, i32 16) 145 %ab_lo = call <32 x i32> @llvm.hexagon.V6.vmpyiewuh.128B(<32 x i32> %a_lo, <32 x i32> %b_lo) 146 %ab_hi = call <32 x i32> @llvm.hexagon.V6.vmpyiewuh.128B(<32 x i32> %a_hi, <32 x i32> %b_hi) 147 %ab = call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %ab_hi, <32 x i32> %ab_lo) 148 ret <64 x i32> %ab 149} 150 151; Do vaslw.acc on double vectors. 152define private <64 x i32> @vaslw.acc.dv.128B(<64 x i32> %a, <64 x i32> %l, i32 %r) nounwind uwtable readnone alwaysinline { 153 %a_lo = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %a) 154 %l_lo = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %l) 155 %s_lo = call <32 x i32> @llvm.hexagon.V6.vaslw.acc.128B(<32 x i32> %a_lo, <32 x i32> %l_lo, i32 %r) 156 %a_hi = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %a) 157 %l_hi = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %l) 158 %s_hi = call <32 x i32> @llvm.hexagon.V6.vaslw.acc.128B(<32 x i32> %a_hi, <32 x i32> %l_hi, i32 %r) 159 %s = call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %s_hi, <32 x i32> %s_lo) 160 ret <64 x i32> %s 161} 162 163define weak_odr <64 x i32> @halide.hexagon.mul.vuw.vuh(<64 x i32> %a, <64 x i16> %b) nounwind uwtable readnone alwaysinline { 164 %a_lo = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %a) 165 %a_hi = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %a) 166 %a_e = call <32 x i32> @llvm.hexagon.V6.vshufeh.128B(<32 x i32> %a_hi, <32 x i32> %a_lo) 167 %a_o = call <32 x i32> @llvm.hexagon.V6.vshufoh.128B(<32 x i32> %a_hi, <32 x i32> %a_lo) 168 %b_32 = bitcast <64 x i16> %b to <32 x i32> 169 %ab_e = call <64 x i32> @llvm.hexagon.V6.vmpyuhv.128B(<32 x i32> %a_e, <32 x i32> %b_32) 170 %ab_o = call <64 x i32> @llvm.hexagon.V6.vmpyuhv.128B(<32 x i32> %a_o, <32 x i32> %b_32) 171 %ab = call <64 x i32> @vaslw.acc.dv.128B(<64 x i32> %ab_e, <64 x i32> %ab_o, i32 16) 172 ret <64 x i32> %ab 173} 174 175define weak_odr <64 x i32> @halide.hexagon.mul.vuw.vuw(<64 x i32> %a, <64 x i32> %b) nounwind uwtable readnone alwaysinline { 176 %a_lo = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %a) 177 %a_hi = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %a) 178 %b_lo = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %b) 179 %b_hi = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %b) 180 %a_e = call <32 x i32> @llvm.hexagon.V6.vshufeh.128B(<32 x i32> %a_hi, <32 x i32> %a_lo) 181 %a_o = call <32 x i32> @llvm.hexagon.V6.vshufoh.128B(<32 x i32> %a_hi, <32 x i32> %a_lo) 182 %b_e = call <32 x i32> @llvm.hexagon.V6.vshufeh.128B(<32 x i32> %b_hi, <32 x i32> %b_lo) 183 %b_o = call <32 x i32> @llvm.hexagon.V6.vshufoh.128B(<32 x i32> %b_hi, <32 x i32> %b_lo) 184 %ab_e = call <64 x i32> @llvm.hexagon.V6.vmpyuhv.128B(<32 x i32> %a_e, <32 x i32> %b_e) 185 %ab_o1 = call <64 x i32> @llvm.hexagon.V6.vmpyuhv.128B(<32 x i32> %a_o, <32 x i32> %b_e) 186 %ab_o = call <64 x i32> @llvm.hexagon.V6.vmpyuhv.acc.128B(<64 x i32> %ab_o1, <32 x i32> %a_e, <32 x i32> %b_o) 187 %ab = call <64 x i32> @vaslw.acc.dv.128B(<64 x i32> %ab_e, <64 x i32> %ab_o, i32 16) 188 ret <64 x i32> %ab 189} 190 191; 32 bit multiply keep high half. 192declare <32 x i32> @llvm.hexagon.V6.vmpyewuh.128B(<32 x i32>, <32 x i32>) 193declare <32 x i32> @llvm.hexagon.V6.vmpyowh.sacc.128B(<32 x i32>, <32 x i32>, <32 x i32>) 194declare <32 x i32> @llvm.hexagon.V6.vmpyowh.rnd.sacc.128B(<32 x i32>, <32 x i32>, <32 x i32>) 195declare <32 x i32> @llvm.hexagon.V6.vasrw.128B(<32 x i32>, i32) 196 197define weak_odr <32 x i32> @halide.hexagon.trunc_mpy.vw.vw(<32 x i32> %a, <32 x i32> %b) nounwind uwtable readnone alwaysinline { 198 %ab1 = call <32 x i32> @llvm.hexagon.V6.vmpyewuh.128B(<32 x i32> %a, <32 x i32> %b) 199 %ab2 = call <32 x i32> @llvm.hexagon.V6.vmpyowh.sacc.128B(<32 x i32> %ab1, <32 x i32> %a, <32 x i32> %b) 200 %ab = call <32 x i32> @llvm.hexagon.V6.vasrw.128B(<32 x i32> %ab2, i32 1) 201 ret <32 x i32> %ab 202} 203 204define weak_odr <32 x i32> @halide.hexagon.trunc_satdw_mpy2.vw.vw(<32 x i32> %a, <32 x i32> %b) nounwind uwtable readnone alwaysinline { 205 %ab1 = call <32 x i32> @llvm.hexagon.V6.vmpyewuh.128B(<32 x i32> %a, <32 x i32> %b) 206 %ab = call <32 x i32> @llvm.hexagon.V6.vmpyowh.sacc.128B(<32 x i32> %ab1, <32 x i32> %a, <32 x i32> %b) 207 ret <32 x i32> %ab 208} 209 210define weak_odr <32 x i32> @halide.hexagon.trunc_satdw_mpy2_rnd.vw.vw(<32 x i32> %a, <32 x i32> %b) nounwind uwtable readnone alwaysinline { 211 %ab1 = call <32 x i32> @llvm.hexagon.V6.vmpyewuh.128B(<32 x i32> %a, <32 x i32> %b) 212 %ab = call <32 x i32> @llvm.hexagon.V6.vmpyowh.rnd.sacc.128B(<32 x i32> %ab1, <32 x i32> %a, <32 x i32> %b) 213 ret <32 x i32> %ab 214} 215 216; Hexagon is missing shifts for byte sized operands. 217declare <32 x i32> @llvm.hexagon.V6.vaslh.128B(<32 x i32>, i32) 218declare <32 x i32> @llvm.hexagon.V6.vasrh.128B(<32 x i32>, i32) 219declare <32 x i32> @llvm.hexagon.V6.vlsrh.128B(<32 x i32>, i32) 220declare <32 x i32> @llvm.hexagon.V6.vaslhv.128B(<32 x i32>, <32 x i32>) 221declare <32 x i32> @llvm.hexagon.V6.vasrhv.128B(<32 x i32>, <32 x i32>) 222declare <32 x i32> @llvm.hexagon.V6.vlsrhv.128B(<32 x i32>, <32 x i32>) 223declare <64 x i32> @llvm.hexagon.V6.vzb.128B(<32 x i32>) 224declare <64 x i32> @llvm.hexagon.V6.vsb.128B(<32 x i32>) 225declare <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32>, <32 x i32>) 226 227define weak_odr <128 x i8> @halide.hexagon.shl.vub.b(<128 x i8> %a, i8 %b) nounwind uwtable readnone alwaysinline { 228 %a_32 = bitcast <128 x i8> %a to <32 x i32> 229 %bw = sext i8 %b to i32 230 %aw = call <64 x i32> @llvm.hexagon.V6.vzb.128B(<32 x i32> %a_32) 231 %aw_lo = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %aw) 232 %sw_lo = call <32 x i32> @llvm.hexagon.V6.vaslh.128B(<32 x i32> %aw_lo, i32 %bw) 233 %aw_hi = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %aw) 234 %sw_hi = call <32 x i32> @llvm.hexagon.V6.vaslh.128B(<32 x i32> %aw_hi, i32 %bw) 235 %r_32 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %sw_hi, <32 x i32> %sw_lo) 236 %r = bitcast <32 x i32> %r_32 to <128 x i8> 237 ret <128 x i8> %r 238} 239 240define weak_odr <128 x i8> @halide.hexagon.shl.vb.b(<128 x i8> %a, i8 %b) nounwind uwtable readnone alwaysinline { 241 ; A shift left is the same whether it is signed or not. 242 %u = tail call <128 x i8> @halide.hexagon.shl.vub.b(<128 x i8> %a, i8 %b) 243 ret <128 x i8> %u 244} 245 246define weak_odr <128 x i8> @halide.hexagon.shr.vub.b(<128 x i8> %a, i8 %b) nounwind uwtable readnone alwaysinline { 247 %a_32 = bitcast <128 x i8> %a to <32 x i32> 248 %bw = sext i8 %b to i32 249 %aw = call <64 x i32> @llvm.hexagon.V6.vzb.128B(<32 x i32> %a_32) 250 %aw_lo = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %aw) 251 %sw_lo = call <32 x i32> @llvm.hexagon.V6.vlsrh.128B(<32 x i32> %aw_lo, i32 %bw) 252 %aw_hi = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %aw) 253 %sw_hi = call <32 x i32> @llvm.hexagon.V6.vlsrh.128B(<32 x i32> %aw_hi, i32 %bw) 254 %r_32 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %sw_hi, <32 x i32> %sw_lo) 255 %r = bitcast <32 x i32> %r_32 to <128 x i8> 256 ret <128 x i8> %r 257} 258 259define weak_odr <128 x i8> @halide.hexagon.shr.vb.b(<128 x i8> %a, i8 %b) nounwind uwtable readnone alwaysinline { 260 %a_32 = bitcast <128 x i8> %a to <32 x i32> 261 %bw = sext i8 %b to i32 262 %aw = call <64 x i32> @llvm.hexagon.V6.vsb.128B(<32 x i32> %a_32) 263 %aw_lo = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %aw) 264 %sw_lo = call <32 x i32> @llvm.hexagon.V6.vasrh.128B(<32 x i32> %aw_lo, i32 %bw) 265 %aw_hi = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %aw) 266 %sw_hi = call <32 x i32> @llvm.hexagon.V6.vasrh.128B(<32 x i32> %aw_hi, i32 %bw) 267 %r_32 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %sw_hi, <32 x i32> %sw_lo) 268 %r = bitcast <32 x i32> %r_32 to <128 x i8> 269 ret <128 x i8> %r 270} 271 272 273 274define weak_odr <128 x i8> @halide.hexagon.shl.vub.vb(<128 x i8> %a, <128 x i8> %b) nounwind uwtable readnone alwaysinline { 275 %a_32 = bitcast <128 x i8> %a to <32 x i32> 276 %b_32 = bitcast <128 x i8> %b to <32 x i32> 277 %aw = call <64 x i32> @llvm.hexagon.V6.vzb.128B(<32 x i32> %a_32) 278 %bw = call <64 x i32> @llvm.hexagon.V6.vsb.128B(<32 x i32> %b_32) 279 %aw_lo = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %aw) 280 %bw_lo = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %bw) 281 %sw_lo = call <32 x i32> @llvm.hexagon.V6.vaslhv.128B(<32 x i32> %aw_lo, <32 x i32> %bw_lo) 282 %aw_hi = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %aw) 283 %bw_hi = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %bw) 284 %sw_hi = call <32 x i32> @llvm.hexagon.V6.vaslhv.128B(<32 x i32> %aw_hi, <32 x i32> %bw_hi) 285 %r_32 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %sw_hi, <32 x i32> %sw_lo) 286 %r = bitcast <32 x i32> %r_32 to <128 x i8> 287 ret <128 x i8> %r 288} 289 290define weak_odr <128 x i8> @halide.hexagon.shl.vb.vb(<128 x i8> %a, <128 x i8> %b) nounwind uwtable readnone alwaysinline { 291 ; A shift left is the same whether it is signed or not. 292 %u = tail call <128 x i8> @halide.hexagon.shl.vub.vb(<128 x i8> %a, <128 x i8> %b) 293 ret <128 x i8> %u 294} 295 296define weak_odr <128 x i8> @halide.hexagon.shr.vub.vb(<128 x i8> %a, <128 x i8> %b) nounwind uwtable readnone alwaysinline { 297 %a_32 = bitcast <128 x i8> %a to <32 x i32> 298 %b_32 = bitcast <128 x i8> %b to <32 x i32> 299 %aw = call <64 x i32> @llvm.hexagon.V6.vzb.128B(<32 x i32> %a_32) 300 %bw = call <64 x i32> @llvm.hexagon.V6.vsb.128B(<32 x i32> %b_32) 301 %aw_lo = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %aw) 302 %bw_lo = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %bw) 303 %sw_lo = call <32 x i32> @llvm.hexagon.V6.vlsrhv.128B(<32 x i32> %aw_lo, <32 x i32> %bw_lo) 304 %aw_hi = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %aw) 305 %bw_hi = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %bw) 306 %sw_hi = call <32 x i32> @llvm.hexagon.V6.vlsrhv.128B(<32 x i32> %aw_hi, <32 x i32> %bw_hi) 307 %r_32 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %sw_hi, <32 x i32> %sw_lo) 308 %r = bitcast <32 x i32> %r_32 to <128 x i8> 309 ret <128 x i8> %r 310} 311 312define weak_odr <128 x i8> @halide.hexagon.shr.vb.vb(<128 x i8> %a, <128 x i8> %b) nounwind uwtable readnone alwaysinline { 313 %a_32 = bitcast <128 x i8> %a to <32 x i32> 314 %b_32 = bitcast <128 x i8> %b to <32 x i32> 315 %aw = call <64 x i32> @llvm.hexagon.V6.vsb.128B(<32 x i32> %a_32) 316 %bw = call <64 x i32> @llvm.hexagon.V6.vsb.128B(<32 x i32> %b_32) 317 %aw_lo = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %aw) 318 %bw_lo = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %bw) 319 %sw_lo = call <32 x i32> @llvm.hexagon.V6.vasrhv.128B(<32 x i32> %aw_lo, <32 x i32> %bw_lo) 320 %aw_hi = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %aw) 321 %bw_hi = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %bw) 322 %sw_hi = call <32 x i32> @llvm.hexagon.V6.vasrhv.128B(<32 x i32> %aw_hi, <32 x i32> %bw_hi) 323 %r_32 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %sw_hi, <32 x i32> %sw_lo) 324 %r = bitcast <32 x i32> %r_32 to <128 x i8> 325 ret <128 x i8> %r 326} 327 328declare <64 x i32> @llvm.hexagon.V6.vmpabus.128B(<64 x i32>, i32) 329declare <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32>, <64 x i32>, i32) 330 331define weak_odr <128 x i16> @halide.hexagon.add_2mpy.vub.vub.b.b(<128 x i8> %low_v, <128 x i8> %high_v, i8 %low_c, i8 %high_c) nounwind uwtable readnone { 332 %const = call i32 @halide.hexagon.interleave.b.dup2.h(i8 %low_c, i8 %high_c) 333 %low = bitcast <128 x i8> %low_v to <32 x i32> 334 %high = bitcast <128 x i8> %high_v to <32 x i32> 335 %dv = call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %high, <32 x i32> %low) 336 %res = call <64 x i32> @llvm.hexagon.V6.vmpabus.128B(<64 x i32> %dv, i32 %const) 337 %ret_val = bitcast <64 x i32> %res to <128 x i16> 338 ret <128 x i16> %ret_val 339} 340 341define weak_odr <128 x i16> @halide.hexagon.acc_add_2mpy.vh.vub.vub.b.b(<128 x i16> %acc, <128 x i8> %low_v, <128 x i8> %high_v, i8 %low_c, i8 %high_c) nounwind uwtable readnone { 342 %dv0 = bitcast <128 x i16> %acc to <64 x i32> 343 %const = call i32 @halide.hexagon.interleave.b.dup2.h(i8 %low_c, i8 %high_c) 344 %low = bitcast <128 x i8> %low_v to <32 x i32> 345 %high = bitcast <128 x i8> %high_v to <32 x i32> 346 %dv1 = call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %high, <32 x i32> %low) 347 %res = call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> %dv0, <64 x i32> %dv1, i32 %const) 348 %ret_val = bitcast <64 x i32> %res to <128 x i16> 349 ret <128 x i16> %ret_val 350} 351 352declare <64 x i32> @llvm.hexagon.V6.vmpahb.128B(<64 x i32>, i32) 353declare <64 x i32> @llvm.hexagon.V6.vmpahb.acc.128B(<64 x i32>, <64 x i32>, i32) 354 355define weak_odr <64 x i32> @halide.hexagon.add_2mpy.vh.vh.b.b(<64 x i16> %low_v, <64 x i16> %high_v, i8 %low_c, i8 %high_c) nounwind uwtable readnone { 356 %const = call i32 @halide.hexagon.interleave.b.dup2.h(i8 %low_c, i8 %high_c) 357 %low = bitcast <64 x i16> %low_v to <32 x i32> 358 %high = bitcast <64 x i16> %high_v to <32 x i32> 359 %dv = call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %high, <32 x i32> %low) 360 %res = call <64 x i32> @llvm.hexagon.V6.vmpahb.128B(<64 x i32> %dv, i32 %const) 361 ret <64 x i32> %res 362} 363 364define weak_odr <64 x i32> @halide.hexagon.acc_add_2mpy.vw.vh.vh.b.b(<64 x i32> %acc, <64 x i16> %low_v, <64 x i16> %high_v, i8 %low_c, i8 %high_c) nounwind uwtable readnone { 365 %const = call i32 @halide.hexagon.interleave.b.dup2.h(i8 %low_c, i8 %high_c) 366 %low = bitcast <64 x i16> %low_v to <32 x i32> 367 %high = bitcast <64 x i16> %high_v to <32 x i32> 368 %dv1 = call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %high, <32 x i32> %low) 369 %res = call <64 x i32> @llvm.hexagon.V6.vmpahb.acc.128B(<64 x i32> %acc, <64 x i32> %dv1, i32 %const) 370 ret <64 x i32> %res 371} 372 373; Define a missing saturating narrow instruction in terms of a saturating narrowing shift. 374declare <32 x i32> @llvm.hexagon.V6.vasrwuhsat.128B(<32 x i32>, <32 x i32>, i32) 375 376define weak_odr <64 x i16> @halide.hexagon.trunc_satuh.vw(<64 x i32> %arg) nounwind uwtable readnone alwaysinline { 377 %e = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %arg) 378 %o = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %arg) 379 %r_32 = call <32 x i32> @llvm.hexagon.V6.vasrwuhsat.128B(<32 x i32> %o, <32 x i32> %e, i32 0) 380 %r = bitcast <32 x i32> %r_32 to <64 x i16> 381 ret <64 x i16> %r 382} 383 384declare <64 x i32> @llvm.hexagon.V6.vtmpybus.128B(<64 x i32>, i32) 385declare <64 x i32> @llvm.hexagon.V6.vtmpyb.128B(<64 x i32>, i32) 386declare <64 x i32> @llvm.hexagon.V6.vtmpyhb.128B(<64 x i32>, i32) 387 388define weak_odr <128 x i16> @halide.hexagon.vtmpy.vub.vub.b.b(<128 x i8> %low_v, <128 x i8> %high_v, i8 %low_c, i8 %high_c) nounwind uwtable readnone { 389 %const = call i32 @halide.hexagon.interleave.b.dup2.h(i8 %low_c, i8 %high_c) 390 %low = bitcast <128 x i8> %low_v to <32 x i32> 391 %high = bitcast <128 x i8> %high_v to <32 x i32> 392 %dv = call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %high, <32 x i32> %low) 393 %res = call <64 x i32> @llvm.hexagon.V6.vtmpybus.128B(<64 x i32> %dv, i32 %const) 394 %ret_val = bitcast <64 x i32> %res to <128 x i16> 395 ret <128 x i16> %ret_val 396} 397 398define weak_odr <128 x i16> @halide.hexagon.vtmpy.vb.vb.b.b(<128 x i8> %low_v, <128 x i8> %high_v, i8 %low_c, i8 %high_c) nounwind uwtable readnone { 399 %const = call i32 @halide.hexagon.interleave.b.dup2.h(i8 %low_c, i8 %high_c) 400 %low = bitcast <128 x i8> %low_v to <32 x i32> 401 %high = bitcast <128 x i8> %high_v to <32 x i32> 402 %dv = call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %high, <32 x i32> %low) 403 %res = call <64 x i32> @llvm.hexagon.V6.vtmpyb.128B(<64 x i32> %dv, i32 %const) 404 %ret_val = bitcast <64 x i32> %res to <128 x i16> 405 ret <128 x i16> %ret_val 406} 407 408define weak_odr <64 x i32> @halide.hexagon.vtmpy.vh.vh.b.b(<64 x i16> %low_v, <64 x i16> %high_v, i8 %low_c, i8 %high_c) nounwind uwtable readnone { 409 %const = call i32 @halide.hexagon.interleave.b.dup2.h(i8 %low_c, i8 %high_c) 410 %low = bitcast <64 x i16> %low_v to <32 x i32> 411 %high = bitcast <64 x i16> %high_v to <32 x i32> 412 %dv = call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %high, <32 x i32> %low) 413 %res = call <64 x i32> @llvm.hexagon.V6.vtmpyhb.128B(<64 x i32> %dv, i32 %const) 414 ret <64 x i32> %res 415} 416 417declare void @llvm.hexagon.V6.vgathermh.128B(i8*, i32, i32, <32 x i32>) 418declare void @llvm.hexagon.V6.vgathermw.128B(i8*, i32, i32, <32 x i32>) 419 420define weak_odr void @halide.hexagon.vgather.h.h(i8* %dst_base, i32 %dst_index, i8* %src_ptr, i32 %size, <64 x i16> %index) nounwind uwtable { 421 %index32 = bitcast <64 x i16> %index to <32 x i32> 422 %src = ptrtoint i8* %src_ptr to i32 423 %dst_16base = bitcast i8* %dst_base to i16* 424 %dst_16ptr = getelementptr i16, i16* %dst_16base, i32 %dst_index 425 %dst_ptr = bitcast i16* %dst_16ptr to i8* 426 call void @llvm.hexagon.V6.vgathermh.128B(i8* %dst_ptr, i32 %src, i32 %size, <32 x i32> %index32) 427 ret void 428} 429 430define weak_odr void @halide.hexagon.vgather.w.w(i8* %dst_base, i32 %dst_index, i8* %src_ptr, i32 %size, <32 x i32> %index) nounwind uwtable { 431 %src = ptrtoint i8* %src_ptr to i32 432 %dst_32base = bitcast i8* %dst_base to i32* 433 %dst_32ptr = getelementptr i32, i32* %dst_32base, i32 %dst_index 434 %dst_ptr = bitcast i32* %dst_32ptr to i8* 435 call void @llvm.hexagon.V6.vgathermw.128B(i8* %dst_ptr, i32 %src, i32 %size, <32 x i32> %index) 436 ret void 437} 438 439declare void @llvm.hexagon.V6.vscattermh.128B(i32, i32, <32 x i32>, <32 x i32>) 440declare void @llvm.hexagon.V6.vscattermw.128B(i32, i32, <32 x i32>, <32 x i32>) 441 442define weak_odr void @halide.hexagon.vscatter.h.h(i8* %buf_ptr, i32 %size, <64 x i16> %idx, <64 x i16> %val) nounwind uwtable writeonly { 443 %idx32 = bitcast <64 x i16> %idx to <32 x i32> 444 %val32 = bitcast <64 x i16> %val to <32 x i32> 445 %buf = ptrtoint i8* %buf_ptr to i32 446 call void @llvm.hexagon.V6.vscattermh.128B(i32 %buf, i32 %size, <32 x i32> %idx32, <32 x i32> %val32) nounwind writeonly 447 ret void 448} 449 450define weak_odr void @halide.hexagon.vscatter.w.w(i8* %buf_ptr, i32 %size, <32 x i32> %idx, <32 x i32> %val) nounwind uwtable writeonly { 451 %buf = ptrtoint i8* %buf_ptr to i32 452 call void @llvm.hexagon.V6.vscattermw.128B(i32 %buf, i32 %size, <32 x i32> %idx, <32 x i32> %val) 453 ret void 454} 455 456declare void @llvm.hexagon.V6.vscattermh.add.128B(i32, i32, <32 x i32>, <32 x i32>) 457declare void @llvm.hexagon.V6.vscattermw.add.128B(i32, i32, <32 x i32>, <32 x i32>) 458 459define weak_odr void @halide.hexagon.vscatter_acc.h.h(i8* %buf_ptr, i32 %size, <64 x i16> %idx, <64 x i16> %val) nounwind uwtable writeonly { 460 %idx32 = bitcast <64 x i16> %idx to <32 x i32> 461 %val32 = bitcast <64 x i16> %val to <32 x i32> 462 %buf = ptrtoint i8* %buf_ptr to i32 463 call void @llvm.hexagon.V6.vscattermh.add.128B(i32 %buf, i32 %size, <32 x i32> %idx32, <32 x i32> %val32) nounwind writeonly 464 ret void 465} 466 467define weak_odr void @halide.hexagon.vscatter_acc.w.w(i8* %buf_ptr, i32 %size, <32 x i32> %idx, <32 x i32> %val) nounwind uwtable writeonly { 468 %buf = ptrtoint i8* %buf_ptr to i32 469 call void @llvm.hexagon.V6.vscattermw.add.128B(i32 %buf, i32 %size, <32 x i32> %idx, <32 x i32> %val) 470 ret void 471} 472 473define weak_odr void @halide.hexagon.scatter.release(i8* %ptr) nounwind uwtable { 474 call void asm sideeffect "vmem($0 + #0):scatter_release\0A; v1 = vmem($0 + #0)\0A", "=*m,*m,~{v1}"(i8* %ptr, i8* %ptr) 475 ret void 476} 477