1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE 3; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE 4 5define i8* @ldrwu32_4(i8* %x, i8* %y) { 6; CHECK-LABEL: ldrwu32_4: 7; CHECK: @ %bb.0: @ %entry 8; CHECK-NEXT: vldrw.u32 q0, [r0, #4] 9; CHECK-NEXT: vstrw.32 q0, [r1] 10; CHECK-NEXT: bx lr 11entry: 12 %z = getelementptr inbounds i8, i8* %x, i32 4 13 %0 = bitcast i8* %z to <4 x i32>* 14 %1 = load <4 x i32>, <4 x i32>* %0, align 4 15 %2 = bitcast i8* %y to <4 x i32>* 16 store <4 x i32> %1, <4 x i32>* %2, align 4 17 ret i8* %x 18} 19 20define i8* @ldrwu32_3(i8* %x, i8* %y) { 21; CHECK-LABEL: ldrwu32_3: 22; CHECK: @ %bb.0: @ %entry 23; CHECK-NEXT: adds r2, r0, #3 24; CHECK-NEXT: vldrw.u32 q0, [r2] 25; CHECK-NEXT: vstrw.32 q0, [r1] 26; CHECK-NEXT: bx lr 27entry: 28 %z = getelementptr inbounds i8, i8* %x, i32 3 29 %0 = bitcast i8* %z to <4 x i32>* 30 %1 = load <4 x i32>, <4 x i32>* %0, align 4 31 %2 = bitcast i8* %y to <4 x i32>* 32 store <4 x i32> %1, <4 x i32>* %2, align 4 33 ret i8* %x 34} 35 36define i8* @ldrwu32_m4(i8* %x, i8* %y) { 37; CHECK-LABEL: ldrwu32_m4: 38; CHECK: @ %bb.0: @ %entry 39; CHECK-NEXT: vldrw.u32 q0, [r0, #-4] 40; CHECK-NEXT: vstrw.32 q0, [r1] 41; CHECK-NEXT: bx lr 42entry: 43 %z = getelementptr inbounds i8, i8* %x, i32 -4 44 %0 = bitcast i8* %z to <4 x i32>* 45 %1 = load <4 x i32>, <4 x i32>* %0, align 4 46 %2 = bitcast i8* %y to <4 x i32>* 47 store <4 x i32> %1, <4 x i32>* %2, align 4 48 ret i8* %x 49} 50 51define i8* @ldrwu32_508(i8* %x, i8* %y) { 52; CHECK-LABEL: ldrwu32_508: 53; CHECK: @ %bb.0: @ %entry 54; CHECK-NEXT: vldrw.u32 q0, [r0, #508] 55; CHECK-NEXT: vstrw.32 q0, [r1] 56; CHECK-NEXT: bx lr 57entry: 58 %z = getelementptr inbounds i8, i8* %x, i32 508 59 %0 = bitcast i8* %z to <4 x i32>* 60 %1 = load <4 x i32>, <4 x i32>* %0, align 4 61 %2 = bitcast i8* %y to <4 x i32>* 62 store <4 x i32> %1, <4 x i32>* %2, align 4 63 ret i8* %x 64} 65 66define i8* @ldrwu32_512(i8* %x, i8* %y) { 67; CHECK-LABEL: ldrwu32_512: 68; CHECK: @ %bb.0: @ %entry 69; CHECK-NEXT: add.w r2, r0, #512 70; CHECK-NEXT: vldrw.u32 q0, [r2] 71; CHECK-NEXT: vstrw.32 q0, [r1] 72; CHECK-NEXT: bx lr 73entry: 74 %z = getelementptr inbounds i8, i8* %x, i32 512 75 %0 = bitcast i8* %z to <4 x i32>* 76 %1 = load <4 x i32>, <4 x i32>* %0, align 4 77 %2 = bitcast i8* %y to <4 x i32>* 78 store <4 x i32> %1, <4 x i32>* %2, align 4 79 ret i8* %x 80} 81 82define i8* @ldrwu32_m508(i8* %x, i8* %y) { 83; CHECK-LABEL: ldrwu32_m508: 84; CHECK: @ %bb.0: @ %entry 85; CHECK-NEXT: vldrw.u32 q0, [r0, #-508] 86; CHECK-NEXT: vstrw.32 q0, [r1] 87; CHECK-NEXT: bx lr 88entry: 89 %z = getelementptr inbounds i8, i8* %x, i32 -508 90 %0 = bitcast i8* %z to <4 x i32>* 91 %1 = load <4 x i32>, <4 x i32>* %0, align 4 92 %2 = bitcast i8* %y to <4 x i32>* 93 store <4 x i32> %1, <4 x i32>* %2, align 4 94 ret i8* %x 95} 96 97define i8* @ldrwu32_m512(i8* %x, i8* %y) { 98; CHECK-LABEL: ldrwu32_m512: 99; CHECK: @ %bb.0: @ %entry 100; CHECK-NEXT: sub.w r2, r0, #512 101; CHECK-NEXT: vldrw.u32 q0, [r2] 102; CHECK-NEXT: vstrw.32 q0, [r1] 103; CHECK-NEXT: bx lr 104entry: 105 %z = getelementptr inbounds i8, i8* %x, i32 -512 106 %0 = bitcast i8* %z to <4 x i32>* 107 %1 = load <4 x i32>, <4 x i32>* %0, align 4 108 %2 = bitcast i8* %y to <4 x i32>* 109 store <4 x i32> %1, <4 x i32>* %2, align 4 110 ret i8* %x 111} 112 113 114define i8* @ldrhu32_4(i8* %x, i8* %y) { 115; CHECK-LABEL: ldrhu32_4: 116; CHECK: @ %bb.0: @ %entry 117; CHECK-NEXT: vldrh.u32 q0, [r0, #4] 118; CHECK-NEXT: vstrw.32 q0, [r1] 119; CHECK-NEXT: bx lr 120entry: 121 %z = getelementptr inbounds i8, i8* %x, i32 4 122 %0 = bitcast i8* %z to <4 x i16>* 123 %1 = load <4 x i16>, <4 x i16>* %0, align 2 124 %2 = zext <4 x i16> %1 to <4 x i32> 125 %3 = bitcast i8* %y to <4 x i32>* 126 store <4 x i32> %2, <4 x i32>* %3, align 4 127 ret i8* %x 128} 129 130define i8* @ldrhu32_3(i8* %x, i8* %y) { 131; CHECK-LABEL: ldrhu32_3: 132; CHECK: @ %bb.0: @ %entry 133; CHECK-NEXT: adds r2, r0, #3 134; CHECK-NEXT: vldrh.u32 q0, [r2] 135; CHECK-NEXT: vstrw.32 q0, [r1] 136; CHECK-NEXT: bx lr 137entry: 138 %z = getelementptr inbounds i8, i8* %x, i32 3 139 %0 = bitcast i8* %z to <4 x i16>* 140 %1 = load <4 x i16>, <4 x i16>* %0, align 2 141 %2 = zext <4 x i16> %1 to <4 x i32> 142 %3 = bitcast i8* %y to <4 x i32>* 143 store <4 x i32> %2, <4 x i32>* %3, align 4 144 ret i8* %x 145} 146 147define i8* @ldrhu32_2(i8* %x, i8* %y) { 148; CHECK-LABEL: ldrhu32_2: 149; CHECK: @ %bb.0: @ %entry 150; CHECK-NEXT: vldrh.u32 q0, [r0, #2] 151; CHECK-NEXT: vstrw.32 q0, [r1] 152; CHECK-NEXT: bx lr 153entry: 154 %z = getelementptr inbounds i8, i8* %x, i32 2 155 %0 = bitcast i8* %z to <4 x i16>* 156 %1 = load <4 x i16>, <4 x i16>* %0, align 2 157 %2 = zext <4 x i16> %1 to <4 x i32> 158 %3 = bitcast i8* %y to <4 x i32>* 159 store <4 x i32> %2, <4 x i32>* %3, align 4 160 ret i8* %x 161} 162 163define i8* @ldrhu32_254(i8* %x, i8* %y) { 164; CHECK-LABEL: ldrhu32_254: 165; CHECK: @ %bb.0: @ %entry 166; CHECK-NEXT: vldrh.u32 q0, [r0, #254] 167; CHECK-NEXT: vstrw.32 q0, [r1] 168; CHECK-NEXT: bx lr 169entry: 170 %z = getelementptr inbounds i8, i8* %x, i32 254 171 %0 = bitcast i8* %z to <4 x i16>* 172 %1 = load <4 x i16>, <4 x i16>* %0, align 2 173 %2 = zext <4 x i16> %1 to <4 x i32> 174 %3 = bitcast i8* %y to <4 x i32>* 175 store <4 x i32> %2, <4 x i32>* %3, align 4 176 ret i8* %x 177} 178 179define i8* @ldrhu32_256(i8* %x, i8* %y) { 180; CHECK-LABEL: ldrhu32_256: 181; CHECK: @ %bb.0: @ %entry 182; CHECK-NEXT: add.w r2, r0, #256 183; CHECK-NEXT: vldrh.u32 q0, [r2] 184; CHECK-NEXT: vstrw.32 q0, [r1] 185; CHECK-NEXT: bx lr 186entry: 187 %z = getelementptr inbounds i8, i8* %x, i32 256 188 %0 = bitcast i8* %z to <4 x i16>* 189 %1 = load <4 x i16>, <4 x i16>* %0, align 2 190 %2 = zext <4 x i16> %1 to <4 x i32> 191 %3 = bitcast i8* %y to <4 x i32>* 192 store <4 x i32> %2, <4 x i32>* %3, align 4 193 ret i8* %x 194} 195 196define i8* @ldrhu32_m254(i8* %x, i8* %y) { 197; CHECK-LABEL: ldrhu32_m254: 198; CHECK: @ %bb.0: @ %entry 199; CHECK-NEXT: vldrh.u32 q0, [r0, #-254] 200; CHECK-NEXT: vstrw.32 q0, [r1] 201; CHECK-NEXT: bx lr 202entry: 203 %z = getelementptr inbounds i8, i8* %x, i32 -254 204 %0 = bitcast i8* %z to <4 x i16>* 205 %1 = load <4 x i16>, <4 x i16>* %0, align 2 206 %2 = zext <4 x i16> %1 to <4 x i32> 207 %3 = bitcast i8* %y to <4 x i32>* 208 store <4 x i32> %2, <4 x i32>* %3, align 4 209 ret i8* %x 210} 211 212define i8* @ldrhu32_m256(i8* %x, i8* %y) { 213; CHECK-LABEL: ldrhu32_m256: 214; CHECK: @ %bb.0: @ %entry 215; CHECK-NEXT: sub.w r2, r0, #256 216; CHECK-NEXT: vldrh.u32 q0, [r2] 217; CHECK-NEXT: vstrw.32 q0, [r1] 218; CHECK-NEXT: bx lr 219entry: 220 %z = getelementptr inbounds i8, i8* %x, i32 -256 221 %0 = bitcast i8* %z to <4 x i16>* 222 %1 = load <4 x i16>, <4 x i16>* %0, align 2 223 %2 = zext <4 x i16> %1 to <4 x i32> 224 %3 = bitcast i8* %y to <4 x i32>* 225 store <4 x i32> %2, <4 x i32>* %3, align 4 226 ret i8* %x 227} 228 229 230define i8* @ldrhs32_4(i8* %x, i8* %y) { 231; CHECK-LABEL: ldrhs32_4: 232; CHECK: @ %bb.0: @ %entry 233; CHECK-NEXT: vldrh.s32 q0, [r0, #4] 234; CHECK-NEXT: vstrw.32 q0, [r1] 235; CHECK-NEXT: bx lr 236entry: 237 %z = getelementptr inbounds i8, i8* %x, i32 4 238 %0 = bitcast i8* %z to <4 x i16>* 239 %1 = load <4 x i16>, <4 x i16>* %0, align 2 240 %2 = sext <4 x i16> %1 to <4 x i32> 241 %3 = bitcast i8* %y to <4 x i32>* 242 store <4 x i32> %2, <4 x i32>* %3, align 4 243 ret i8* %x 244} 245 246define i8* @ldrhs32_3(i8* %x, i8* %y) { 247; CHECK-LABEL: ldrhs32_3: 248; CHECK: @ %bb.0: @ %entry 249; CHECK-NEXT: adds r2, r0, #3 250; CHECK-NEXT: vldrh.s32 q0, [r2] 251; CHECK-NEXT: vstrw.32 q0, [r1] 252; CHECK-NEXT: bx lr 253entry: 254 %z = getelementptr inbounds i8, i8* %x, i32 3 255 %0 = bitcast i8* %z to <4 x i16>* 256 %1 = load <4 x i16>, <4 x i16>* %0, align 2 257 %2 = sext <4 x i16> %1 to <4 x i32> 258 %3 = bitcast i8* %y to <4 x i32>* 259 store <4 x i32> %2, <4 x i32>* %3, align 4 260 ret i8* %x 261} 262 263define i8* @ldrhs32_2(i8* %x, i8* %y) { 264; CHECK-LABEL: ldrhs32_2: 265; CHECK: @ %bb.0: @ %entry 266; CHECK-NEXT: vldrh.s32 q0, [r0, #2] 267; CHECK-NEXT: vstrw.32 q0, [r1] 268; CHECK-NEXT: bx lr 269entry: 270 %z = getelementptr inbounds i8, i8* %x, i32 2 271 %0 = bitcast i8* %z to <4 x i16>* 272 %1 = load <4 x i16>, <4 x i16>* %0, align 2 273 %2 = sext <4 x i16> %1 to <4 x i32> 274 %3 = bitcast i8* %y to <4 x i32>* 275 store <4 x i32> %2, <4 x i32>* %3, align 4 276 ret i8* %x 277} 278 279define i8* @ldrhs32_254(i8* %x, i8* %y) { 280; CHECK-LABEL: ldrhs32_254: 281; CHECK: @ %bb.0: @ %entry 282; CHECK-NEXT: vldrh.s32 q0, [r0, #254] 283; CHECK-NEXT: vstrw.32 q0, [r1] 284; CHECK-NEXT: bx lr 285entry: 286 %z = getelementptr inbounds i8, i8* %x, i32 254 287 %0 = bitcast i8* %z to <4 x i16>* 288 %1 = load <4 x i16>, <4 x i16>* %0, align 2 289 %2 = sext <4 x i16> %1 to <4 x i32> 290 %3 = bitcast i8* %y to <4 x i32>* 291 store <4 x i32> %2, <4 x i32>* %3, align 4 292 ret i8* %x 293} 294 295define i8* @ldrhs32_256(i8* %x, i8* %y) { 296; CHECK-LABEL: ldrhs32_256: 297; CHECK: @ %bb.0: @ %entry 298; CHECK-NEXT: add.w r2, r0, #256 299; CHECK-NEXT: vldrh.s32 q0, [r2] 300; CHECK-NEXT: vstrw.32 q0, [r1] 301; CHECK-NEXT: bx lr 302entry: 303 %z = getelementptr inbounds i8, i8* %x, i32 256 304 %0 = bitcast i8* %z to <4 x i16>* 305 %1 = load <4 x i16>, <4 x i16>* %0, align 2 306 %2 = sext <4 x i16> %1 to <4 x i32> 307 %3 = bitcast i8* %y to <4 x i32>* 308 store <4 x i32> %2, <4 x i32>* %3, align 4 309 ret i8* %x 310} 311 312define i8* @ldrhs32_m254(i8* %x, i8* %y) { 313; CHECK-LABEL: ldrhs32_m254: 314; CHECK: @ %bb.0: @ %entry 315; CHECK-NEXT: vldrh.s32 q0, [r0, #-254] 316; CHECK-NEXT: vstrw.32 q0, [r1] 317; CHECK-NEXT: bx lr 318entry: 319 %z = getelementptr inbounds i8, i8* %x, i32 -254 320 %0 = bitcast i8* %z to <4 x i16>* 321 %1 = load <4 x i16>, <4 x i16>* %0, align 2 322 %2 = sext <4 x i16> %1 to <4 x i32> 323 %3 = bitcast i8* %y to <4 x i32>* 324 store <4 x i32> %2, <4 x i32>* %3, align 4 325 ret i8* %x 326} 327 328define i8* @ldrhs32_m256(i8* %x, i8* %y) { 329; CHECK-LABEL: ldrhs32_m256: 330; CHECK: @ %bb.0: @ %entry 331; CHECK-NEXT: sub.w r2, r0, #256 332; CHECK-NEXT: vldrh.s32 q0, [r2] 333; CHECK-NEXT: vstrw.32 q0, [r1] 334; CHECK-NEXT: bx lr 335entry: 336 %z = getelementptr inbounds i8, i8* %x, i32 -256 337 %0 = bitcast i8* %z to <4 x i16>* 338 %1 = load <4 x i16>, <4 x i16>* %0, align 2 339 %2 = sext <4 x i16> %1 to <4 x i32> 340 %3 = bitcast i8* %y to <4 x i32>* 341 store <4 x i32> %2, <4 x i32>* %3, align 4 342 ret i8* %x 343} 344 345 346define i8* @ldrhu16_4(i8* %x, i8* %y) { 347; CHECK-LABEL: ldrhu16_4: 348; CHECK: @ %bb.0: @ %entry 349; CHECK-NEXT: vldrh.u16 q0, [r0, #4] 350; CHECK-NEXT: vstrh.16 q0, [r1] 351; CHECK-NEXT: bx lr 352entry: 353 %z = getelementptr inbounds i8, i8* %x, i32 4 354 %0 = bitcast i8* %z to <8 x i16>* 355 %1 = load <8 x i16>, <8 x i16>* %0, align 2 356 %2 = bitcast i8* %y to <8 x i16>* 357 store <8 x i16> %1, <8 x i16>* %2, align 2 358 ret i8* %x 359} 360 361define i8* @ldrhu16_3(i8* %x, i8* %y) { 362; CHECK-LABEL: ldrhu16_3: 363; CHECK: @ %bb.0: @ %entry 364; CHECK-NEXT: adds r2, r0, #3 365; CHECK-NEXT: vldrh.u16 q0, [r2] 366; CHECK-NEXT: vstrh.16 q0, [r1] 367; CHECK-NEXT: bx lr 368entry: 369 %z = getelementptr inbounds i8, i8* %x, i32 3 370 %0 = bitcast i8* %z to <8 x i16>* 371 %1 = load <8 x i16>, <8 x i16>* %0, align 2 372 %2 = bitcast i8* %y to <8 x i16>* 373 store <8 x i16> %1, <8 x i16>* %2, align 2 374 ret i8* %x 375} 376 377define i8* @ldrhu16_2(i8* %x, i8* %y) { 378; CHECK-LABEL: ldrhu16_2: 379; CHECK: @ %bb.0: @ %entry 380; CHECK-NEXT: vldrh.u16 q0, [r0, #2] 381; CHECK-NEXT: vstrh.16 q0, [r1] 382; CHECK-NEXT: bx lr 383entry: 384 %z = getelementptr inbounds i8, i8* %x, i32 2 385 %0 = bitcast i8* %z to <8 x i16>* 386 %1 = load <8 x i16>, <8 x i16>* %0, align 2 387 %2 = bitcast i8* %y to <8 x i16>* 388 store <8 x i16> %1, <8 x i16>* %2, align 2 389 ret i8* %x 390} 391 392define i8* @ldrhu16_254(i8* %x, i8* %y) { 393; CHECK-LABEL: ldrhu16_254: 394; CHECK: @ %bb.0: @ %entry 395; CHECK-NEXT: vldrh.u16 q0, [r0, #254] 396; CHECK-NEXT: vstrh.16 q0, [r1] 397; CHECK-NEXT: bx lr 398entry: 399 %z = getelementptr inbounds i8, i8* %x, i32 254 400 %0 = bitcast i8* %z to <8 x i16>* 401 %1 = load <8 x i16>, <8 x i16>* %0, align 2 402 %2 = bitcast i8* %y to <8 x i16>* 403 store <8 x i16> %1, <8 x i16>* %2, align 2 404 ret i8* %x 405} 406 407define i8* @ldrhu16_256(i8* %x, i8* %y) { 408; CHECK-LABEL: ldrhu16_256: 409; CHECK: @ %bb.0: @ %entry 410; CHECK-NEXT: add.w r2, r0, #256 411; CHECK-NEXT: vldrh.u16 q0, [r2] 412; CHECK-NEXT: vstrh.16 q0, [r1] 413; CHECK-NEXT: bx lr 414entry: 415 %z = getelementptr inbounds i8, i8* %x, i32 256 416 %0 = bitcast i8* %z to <8 x i16>* 417 %1 = load <8 x i16>, <8 x i16>* %0, align 2 418 %2 = bitcast i8* %y to <8 x i16>* 419 store <8 x i16> %1, <8 x i16>* %2, align 2 420 ret i8* %x 421} 422 423define i8* @ldrhu16_m254(i8* %x, i8* %y) { 424; CHECK-LABEL: ldrhu16_m254: 425; CHECK: @ %bb.0: @ %entry 426; CHECK-NEXT: vldrh.u16 q0, [r0, #-254] 427; CHECK-NEXT: vstrh.16 q0, [r1] 428; CHECK-NEXT: bx lr 429entry: 430 %z = getelementptr inbounds i8, i8* %x, i32 -254 431 %0 = bitcast i8* %z to <8 x i16>* 432 %1 = load <8 x i16>, <8 x i16>* %0, align 2 433 %2 = bitcast i8* %y to <8 x i16>* 434 store <8 x i16> %1, <8 x i16>* %2, align 2 435 ret i8* %x 436} 437 438define i8* @ldrhu16_m256(i8* %x, i8* %y) { 439; CHECK-LABEL: ldrhu16_m256: 440; CHECK: @ %bb.0: @ %entry 441; CHECK-NEXT: sub.w r2, r0, #256 442; CHECK-NEXT: vldrh.u16 q0, [r2] 443; CHECK-NEXT: vstrh.16 q0, [r1] 444; CHECK-NEXT: bx lr 445entry: 446 %z = getelementptr inbounds i8, i8* %x, i32 -256 447 %0 = bitcast i8* %z to <8 x i16>* 448 %1 = load <8 x i16>, <8 x i16>* %0, align 2 449 %2 = bitcast i8* %y to <8 x i16>* 450 store <8 x i16> %1, <8 x i16>* %2, align 2 451 ret i8* %x 452} 453 454 455define i8* @ldrbu32_4(i8* %x, i8* %y) { 456; CHECK-LABEL: ldrbu32_4: 457; CHECK: @ %bb.0: @ %entry 458; CHECK-NEXT: vldrb.u32 q0, [r0, #4] 459; CHECK-NEXT: vstrw.32 q0, [r1] 460; CHECK-NEXT: bx lr 461entry: 462 %z = getelementptr inbounds i8, i8* %x, i32 4 463 %0 = bitcast i8* %z to <4 x i8>* 464 %1 = load <4 x i8>, <4 x i8>* %0, align 1 465 %2 = zext <4 x i8> %1 to <4 x i32> 466 %3 = bitcast i8* %y to <4 x i32>* 467 store <4 x i32> %2, <4 x i32>* %3, align 4 468 ret i8* %x 469} 470 471define i8* @ldrbu32_3(i8* %x, i8* %y) { 472; CHECK-LABEL: ldrbu32_3: 473; CHECK: @ %bb.0: @ %entry 474; CHECK-NEXT: vldrb.u32 q0, [r0, #3] 475; CHECK-NEXT: vstrw.32 q0, [r1] 476; CHECK-NEXT: bx lr 477entry: 478 %z = getelementptr inbounds i8, i8* %x, i32 3 479 %0 = bitcast i8* %z to <4 x i8>* 480 %1 = load <4 x i8>, <4 x i8>* %0, align 1 481 %2 = zext <4 x i8> %1 to <4 x i32> 482 %3 = bitcast i8* %y to <4 x i32>* 483 store <4 x i32> %2, <4 x i32>* %3, align 4 484 ret i8* %x 485} 486 487define i8* @ldrbu32_127(i8* %x, i8* %y) { 488; CHECK-LABEL: ldrbu32_127: 489; CHECK: @ %bb.0: @ %entry 490; CHECK-NEXT: vldrb.u32 q0, [r0, #127] 491; CHECK-NEXT: vstrw.32 q0, [r1] 492; CHECK-NEXT: bx lr 493entry: 494 %z = getelementptr inbounds i8, i8* %x, i32 127 495 %0 = bitcast i8* %z to <4 x i8>* 496 %1 = load <4 x i8>, <4 x i8>* %0, align 1 497 %2 = zext <4 x i8> %1 to <4 x i32> 498 %3 = bitcast i8* %y to <4 x i32>* 499 store <4 x i32> %2, <4 x i32>* %3, align 4 500 ret i8* %x 501} 502 503define i8* @ldrbu32_128(i8* %x, i8* %y) { 504; CHECK-LABEL: ldrbu32_128: 505; CHECK: @ %bb.0: @ %entry 506; CHECK-NEXT: add.w r2, r0, #128 507; CHECK-NEXT: vldrb.u32 q0, [r2] 508; CHECK-NEXT: vstrw.32 q0, [r1] 509; CHECK-NEXT: bx lr 510entry: 511 %z = getelementptr inbounds i8, i8* %x, i32 128 512 %0 = bitcast i8* %z to <4 x i8>* 513 %1 = load <4 x i8>, <4 x i8>* %0, align 1 514 %2 = zext <4 x i8> %1 to <4 x i32> 515 %3 = bitcast i8* %y to <4 x i32>* 516 store <4 x i32> %2, <4 x i32>* %3, align 4 517 ret i8* %x 518} 519 520define i8* @ldrbu32_m127(i8* %x, i8* %y) { 521; CHECK-LABEL: ldrbu32_m127: 522; CHECK: @ %bb.0: @ %entry 523; CHECK-NEXT: vldrb.u32 q0, [r0, #-127] 524; CHECK-NEXT: vstrw.32 q0, [r1] 525; CHECK-NEXT: bx lr 526entry: 527 %z = getelementptr inbounds i8, i8* %x, i32 -127 528 %0 = bitcast i8* %z to <4 x i8>* 529 %1 = load <4 x i8>, <4 x i8>* %0, align 1 530 %2 = zext <4 x i8> %1 to <4 x i32> 531 %3 = bitcast i8* %y to <4 x i32>* 532 store <4 x i32> %2, <4 x i32>* %3, align 4 533 ret i8* %x 534} 535 536define i8* @ldrbu32_m128(i8* %x, i8* %y) { 537; CHECK-LABEL: ldrbu32_m128: 538; CHECK: @ %bb.0: @ %entry 539; CHECK-NEXT: sub.w r2, r0, #128 540; CHECK-NEXT: vldrb.u32 q0, [r2] 541; CHECK-NEXT: vstrw.32 q0, [r1] 542; CHECK-NEXT: bx lr 543entry: 544 %z = getelementptr inbounds i8, i8* %x, i32 -128 545 %0 = bitcast i8* %z to <4 x i8>* 546 %1 = load <4 x i8>, <4 x i8>* %0, align 1 547 %2 = zext <4 x i8> %1 to <4 x i32> 548 %3 = bitcast i8* %y to <4 x i32>* 549 store <4 x i32> %2, <4 x i32>* %3, align 4 550 ret i8* %x 551} 552 553 554define i8* @ldrbs32_4(i8* %x, i8* %y) { 555; CHECK-LABEL: ldrbs32_4: 556; CHECK: @ %bb.0: @ %entry 557; CHECK-NEXT: vldrb.s32 q0, [r0, #4] 558; CHECK-NEXT: vstrw.32 q0, [r1] 559; CHECK-NEXT: bx lr 560entry: 561 %z = getelementptr inbounds i8, i8* %x, i32 4 562 %0 = bitcast i8* %z to <4 x i8>* 563 %1 = load <4 x i8>, <4 x i8>* %0, align 1 564 %2 = sext <4 x i8> %1 to <4 x i32> 565 %3 = bitcast i8* %y to <4 x i32>* 566 store <4 x i32> %2, <4 x i32>* %3, align 4 567 ret i8* %x 568} 569 570define i8* @ldrbs32_3(i8* %x, i8* %y) { 571; CHECK-LABEL: ldrbs32_3: 572; CHECK: @ %bb.0: @ %entry 573; CHECK-NEXT: vldrb.s32 q0, [r0, #3] 574; CHECK-NEXT: vstrw.32 q0, [r1] 575; CHECK-NEXT: bx lr 576entry: 577 %z = getelementptr inbounds i8, i8* %x, i32 3 578 %0 = bitcast i8* %z to <4 x i8>* 579 %1 = load <4 x i8>, <4 x i8>* %0, align 1 580 %2 = sext <4 x i8> %1 to <4 x i32> 581 %3 = bitcast i8* %y to <4 x i32>* 582 store <4 x i32> %2, <4 x i32>* %3, align 4 583 ret i8* %x 584} 585 586define i8* @ldrbs32_127(i8* %x, i8* %y) { 587; CHECK-LABEL: ldrbs32_127: 588; CHECK: @ %bb.0: @ %entry 589; CHECK-NEXT: vldrb.s32 q0, [r0, #127] 590; CHECK-NEXT: vstrw.32 q0, [r1] 591; CHECK-NEXT: bx lr 592entry: 593 %z = getelementptr inbounds i8, i8* %x, i32 127 594 %0 = bitcast i8* %z to <4 x i8>* 595 %1 = load <4 x i8>, <4 x i8>* %0, align 1 596 %2 = sext <4 x i8> %1 to <4 x i32> 597 %3 = bitcast i8* %y to <4 x i32>* 598 store <4 x i32> %2, <4 x i32>* %3, align 4 599 ret i8* %x 600} 601 602define i8* @ldrbs32_128(i8* %x, i8* %y) { 603; CHECK-LABEL: ldrbs32_128: 604; CHECK: @ %bb.0: @ %entry 605; CHECK-NEXT: add.w r2, r0, #128 606; CHECK-NEXT: vldrb.s32 q0, [r2] 607; CHECK-NEXT: vstrw.32 q0, [r1] 608; CHECK-NEXT: bx lr 609entry: 610 %z = getelementptr inbounds i8, i8* %x, i32 128 611 %0 = bitcast i8* %z to <4 x i8>* 612 %1 = load <4 x i8>, <4 x i8>* %0, align 1 613 %2 = sext <4 x i8> %1 to <4 x i32> 614 %3 = bitcast i8* %y to <4 x i32>* 615 store <4 x i32> %2, <4 x i32>* %3, align 4 616 ret i8* %x 617} 618 619define i8* @ldrbs32_m127(i8* %x, i8* %y) { 620; CHECK-LABEL: ldrbs32_m127: 621; CHECK: @ %bb.0: @ %entry 622; CHECK-NEXT: vldrb.s32 q0, [r0, #-127] 623; CHECK-NEXT: vstrw.32 q0, [r1] 624; CHECK-NEXT: bx lr 625entry: 626 %z = getelementptr inbounds i8, i8* %x, i32 -127 627 %0 = bitcast i8* %z to <4 x i8>* 628 %1 = load <4 x i8>, <4 x i8>* %0, align 1 629 %2 = sext <4 x i8> %1 to <4 x i32> 630 %3 = bitcast i8* %y to <4 x i32>* 631 store <4 x i32> %2, <4 x i32>* %3, align 4 632 ret i8* %x 633} 634 635define i8* @ldrbs32_m128(i8* %x, i8* %y) { 636; CHECK-LABEL: ldrbs32_m128: 637; CHECK: @ %bb.0: @ %entry 638; CHECK-NEXT: sub.w r2, r0, #128 639; CHECK-NEXT: vldrb.s32 q0, [r2] 640; CHECK-NEXT: vstrw.32 q0, [r1] 641; CHECK-NEXT: bx lr 642entry: 643 %z = getelementptr inbounds i8, i8* %x, i32 -128 644 %0 = bitcast i8* %z to <4 x i8>* 645 %1 = load <4 x i8>, <4 x i8>* %0, align 1 646 %2 = sext <4 x i8> %1 to <4 x i32> 647 %3 = bitcast i8* %y to <4 x i32>* 648 store <4 x i32> %2, <4 x i32>* %3, align 4 649 ret i8* %x 650} 651 652 653define i8* @ldrbu16_4(i8* %x, i8* %y) { 654; CHECK-LABEL: ldrbu16_4: 655; CHECK: @ %bb.0: @ %entry 656; CHECK-NEXT: vldrb.u16 q0, [r0, #4] 657; CHECK-NEXT: vstrh.16 q0, [r1] 658; CHECK-NEXT: bx lr 659entry: 660 %z = getelementptr inbounds i8, i8* %x, i32 4 661 %0 = bitcast i8* %z to <8 x i8>* 662 %1 = load <8 x i8>, <8 x i8>* %0, align 1 663 %2 = zext <8 x i8> %1 to <8 x i16> 664 %3 = bitcast i8* %y to <8 x i16>* 665 store <8 x i16> %2, <8 x i16>* %3, align 2 666 ret i8* %x 667} 668 669define i8* @ldrbu16_3(i8* %x, i8* %y) { 670; CHECK-LABEL: ldrbu16_3: 671; CHECK: @ %bb.0: @ %entry 672; CHECK-NEXT: vldrb.u16 q0, [r0, #3] 673; CHECK-NEXT: vstrh.16 q0, [r1] 674; CHECK-NEXT: bx lr 675entry: 676 %z = getelementptr inbounds i8, i8* %x, i32 3 677 %0 = bitcast i8* %z to <8 x i8>* 678 %1 = load <8 x i8>, <8 x i8>* %0, align 1 679 %2 = zext <8 x i8> %1 to <8 x i16> 680 %3 = bitcast i8* %y to <8 x i16>* 681 store <8 x i16> %2, <8 x i16>* %3, align 2 682 ret i8* %x 683} 684 685define i8* @ldrbu16_127(i8* %x, i8* %y) { 686; CHECK-LABEL: ldrbu16_127: 687; CHECK: @ %bb.0: @ %entry 688; CHECK-NEXT: vldrb.u16 q0, [r0, #127] 689; CHECK-NEXT: vstrh.16 q0, [r1] 690; CHECK-NEXT: bx lr 691entry: 692 %z = getelementptr inbounds i8, i8* %x, i32 127 693 %0 = bitcast i8* %z to <8 x i8>* 694 %1 = load <8 x i8>, <8 x i8>* %0, align 1 695 %2 = zext <8 x i8> %1 to <8 x i16> 696 %3 = bitcast i8* %y to <8 x i16>* 697 store <8 x i16> %2, <8 x i16>* %3, align 2 698 ret i8* %x 699} 700 701define i8* @ldrbu16_128(i8* %x, i8* %y) { 702; CHECK-LABEL: ldrbu16_128: 703; CHECK: @ %bb.0: @ %entry 704; CHECK-NEXT: add.w r2, r0, #128 705; CHECK-NEXT: vldrb.u16 q0, [r2] 706; CHECK-NEXT: vstrh.16 q0, [r1] 707; CHECK-NEXT: bx lr 708entry: 709 %z = getelementptr inbounds i8, i8* %x, i32 128 710 %0 = bitcast i8* %z to <8 x i8>* 711 %1 = load <8 x i8>, <8 x i8>* %0, align 1 712 %2 = zext <8 x i8> %1 to <8 x i16> 713 %3 = bitcast i8* %y to <8 x i16>* 714 store <8 x i16> %2, <8 x i16>* %3, align 2 715 ret i8* %x 716} 717 718define i8* @ldrbu16_m127(i8* %x, i8* %y) { 719; CHECK-LABEL: ldrbu16_m127: 720; CHECK: @ %bb.0: @ %entry 721; CHECK-NEXT: vldrb.u16 q0, [r0, #-127] 722; CHECK-NEXT: vstrh.16 q0, [r1] 723; CHECK-NEXT: bx lr 724entry: 725 %z = getelementptr inbounds i8, i8* %x, i32 -127 726 %0 = bitcast i8* %z to <8 x i8>* 727 %1 = load <8 x i8>, <8 x i8>* %0, align 1 728 %2 = zext <8 x i8> %1 to <8 x i16> 729 %3 = bitcast i8* %y to <8 x i16>* 730 store <8 x i16> %2, <8 x i16>* %3, align 2 731 ret i8* %x 732} 733 734define i8* @ldrbu16_m128(i8* %x, i8* %y) { 735; CHECK-LABEL: ldrbu16_m128: 736; CHECK: @ %bb.0: @ %entry 737; CHECK-NEXT: sub.w r2, r0, #128 738; CHECK-NEXT: vldrb.u16 q0, [r2] 739; CHECK-NEXT: vstrh.16 q0, [r1] 740; CHECK-NEXT: bx lr 741entry: 742 %z = getelementptr inbounds i8, i8* %x, i32 -128 743 %0 = bitcast i8* %z to <8 x i8>* 744 %1 = load <8 x i8>, <8 x i8>* %0, align 1 745 %2 = zext <8 x i8> %1 to <8 x i16> 746 %3 = bitcast i8* %y to <8 x i16>* 747 store <8 x i16> %2, <8 x i16>* %3, align 2 748 ret i8* %x 749} 750 751 752define i8* @ldrbs16_4(i8* %x, i8* %y) { 753; CHECK-LABEL: ldrbs16_4: 754; CHECK: @ %bb.0: @ %entry 755; CHECK-NEXT: vldrb.s16 q0, [r0, #4] 756; CHECK-NEXT: vstrh.16 q0, [r1] 757; CHECK-NEXT: bx lr 758entry: 759 %z = getelementptr inbounds i8, i8* %x, i32 4 760 %0 = bitcast i8* %z to <8 x i8>* 761 %1 = load <8 x i8>, <8 x i8>* %0, align 1 762 %2 = sext <8 x i8> %1 to <8 x i16> 763 %3 = bitcast i8* %y to <8 x i16>* 764 store <8 x i16> %2, <8 x i16>* %3, align 2 765 ret i8* %x 766} 767 768define i8* @ldrbs16_3(i8* %x, i8* %y) { 769; CHECK-LABEL: ldrbs16_3: 770; CHECK: @ %bb.0: @ %entry 771; CHECK-NEXT: vldrb.s16 q0, [r0, #3] 772; CHECK-NEXT: vstrh.16 q0, [r1] 773; CHECK-NEXT: bx lr 774entry: 775 %z = getelementptr inbounds i8, i8* %x, i32 3 776 %0 = bitcast i8* %z to <8 x i8>* 777 %1 = load <8 x i8>, <8 x i8>* %0, align 1 778 %2 = sext <8 x i8> %1 to <8 x i16> 779 %3 = bitcast i8* %y to <8 x i16>* 780 store <8 x i16> %2, <8 x i16>* %3, align 2 781 ret i8* %x 782} 783 784define i8* @ldrbs16_127(i8* %x, i8* %y) { 785; CHECK-LABEL: ldrbs16_127: 786; CHECK: @ %bb.0: @ %entry 787; CHECK-NEXT: vldrb.s16 q0, [r0, #127] 788; CHECK-NEXT: vstrh.16 q0, [r1] 789; CHECK-NEXT: bx lr 790entry: 791 %z = getelementptr inbounds i8, i8* %x, i32 127 792 %0 = bitcast i8* %z to <8 x i8>* 793 %1 = load <8 x i8>, <8 x i8>* %0, align 1 794 %2 = sext <8 x i8> %1 to <8 x i16> 795 %3 = bitcast i8* %y to <8 x i16>* 796 store <8 x i16> %2, <8 x i16>* %3, align 2 797 ret i8* %x 798} 799 800define i8* @ldrbs16_128(i8* %x, i8* %y) { 801; CHECK-LABEL: ldrbs16_128: 802; CHECK: @ %bb.0: @ %entry 803; CHECK-NEXT: add.w r2, r0, #128 804; CHECK-NEXT: vldrb.s16 q0, [r2] 805; CHECK-NEXT: vstrh.16 q0, [r1] 806; CHECK-NEXT: bx lr 807entry: 808 %z = getelementptr inbounds i8, i8* %x, i32 128 809 %0 = bitcast i8* %z to <8 x i8>* 810 %1 = load <8 x i8>, <8 x i8>* %0, align 1 811 %2 = sext <8 x i8> %1 to <8 x i16> 812 %3 = bitcast i8* %y to <8 x i16>* 813 store <8 x i16> %2, <8 x i16>* %3, align 2 814 ret i8* %x 815} 816 817define i8* @ldrbs16_m127(i8* %x, i8* %y) { 818; CHECK-LABEL: ldrbs16_m127: 819; CHECK: @ %bb.0: @ %entry 820; CHECK-NEXT: vldrb.s16 q0, [r0, #-127] 821; CHECK-NEXT: vstrh.16 q0, [r1] 822; CHECK-NEXT: bx lr 823entry: 824 %z = getelementptr inbounds i8, i8* %x, i32 -127 825 %0 = bitcast i8* %z to <8 x i8>* 826 %1 = load <8 x i8>, <8 x i8>* %0, align 1 827 %2 = sext <8 x i8> %1 to <8 x i16> 828 %3 = bitcast i8* %y to <8 x i16>* 829 store <8 x i16> %2, <8 x i16>* %3, align 2 830 ret i8* %x 831} 832 833define i8* @ldrbs16_m128(i8* %x, i8* %y) { 834; CHECK-LABEL: ldrbs16_m128: 835; CHECK: @ %bb.0: @ %entry 836; CHECK-NEXT: sub.w r2, r0, #128 837; CHECK-NEXT: vldrb.s16 q0, [r2] 838; CHECK-NEXT: vstrh.16 q0, [r1] 839; CHECK-NEXT: bx lr 840entry: 841 %z = getelementptr inbounds i8, i8* %x, i32 -128 842 %0 = bitcast i8* %z to <8 x i8>* 843 %1 = load <8 x i8>, <8 x i8>* %0, align 1 844 %2 = sext <8 x i8> %1 to <8 x i16> 845 %3 = bitcast i8* %y to <8 x i16>* 846 store <8 x i16> %2, <8 x i16>* %3, align 2 847 ret i8* %x 848} 849 850 851define i8* @ldrbu8_4(i8* %x, i8* %y) { 852; CHECK-LABEL: ldrbu8_4: 853; CHECK: @ %bb.0: @ %entry 854; CHECK-NEXT: vldrb.u8 q0, [r0, #4] 855; CHECK-NEXT: vstrb.8 q0, [r1] 856; CHECK-NEXT: bx lr 857entry: 858 %z = getelementptr inbounds i8, i8* %x, i32 4 859 %0 = bitcast i8* %z to <16 x i8>* 860 %1 = load <16 x i8>, <16 x i8>* %0, align 1 861 %2 = bitcast i8* %y to <16 x i8>* 862 store <16 x i8> %1, <16 x i8>* %2, align 1 863 ret i8* %x 864} 865 866define i8* @ldrbu8_3(i8* %x, i8* %y) { 867; CHECK-LABEL: ldrbu8_3: 868; CHECK: @ %bb.0: @ %entry 869; CHECK-NEXT: vldrb.u8 q0, [r0, #3] 870; CHECK-NEXT: vstrb.8 q0, [r1] 871; CHECK-NEXT: bx lr 872entry: 873 %z = getelementptr inbounds i8, i8* %x, i32 3 874 %0 = bitcast i8* %z to <16 x i8>* 875 %1 = load <16 x i8>, <16 x i8>* %0, align 1 876 %2 = bitcast i8* %y to <16 x i8>* 877 store <16 x i8> %1, <16 x i8>* %2, align 1 878 ret i8* %x 879} 880 881define i8* @ldrbu8_127(i8* %x, i8* %y) { 882; CHECK-LABEL: ldrbu8_127: 883; CHECK: @ %bb.0: @ %entry 884; CHECK-NEXT: vldrb.u8 q0, [r0, #127] 885; CHECK-NEXT: vstrb.8 q0, [r1] 886; CHECK-NEXT: bx lr 887entry: 888 %z = getelementptr inbounds i8, i8* %x, i32 127 889 %0 = bitcast i8* %z to <16 x i8>* 890 %1 = load <16 x i8>, <16 x i8>* %0, align 1 891 %2 = bitcast i8* %y to <16 x i8>* 892 store <16 x i8> %1, <16 x i8>* %2, align 1 893 ret i8* %x 894} 895 896define i8* @ldrbu8_128(i8* %x, i8* %y) { 897; CHECK-LABEL: ldrbu8_128: 898; CHECK: @ %bb.0: @ %entry 899; CHECK-NEXT: add.w r2, r0, #128 900; CHECK-NEXT: vldrb.u8 q0, [r2] 901; CHECK-NEXT: vstrb.8 q0, [r1] 902; CHECK-NEXT: bx lr 903entry: 904 %z = getelementptr inbounds i8, i8* %x, i32 128 905 %0 = bitcast i8* %z to <16 x i8>* 906 %1 = load <16 x i8>, <16 x i8>* %0, align 1 907 %2 = bitcast i8* %y to <16 x i8>* 908 store <16 x i8> %1, <16 x i8>* %2, align 1 909 ret i8* %x 910} 911 912define i8* @ldrbu8_m127(i8* %x, i8* %y) { 913; CHECK-LABEL: ldrbu8_m127: 914; CHECK: @ %bb.0: @ %entry 915; CHECK-NEXT: vldrb.u8 q0, [r0, #-127] 916; CHECK-NEXT: vstrb.8 q0, [r1] 917; CHECK-NEXT: bx lr 918entry: 919 %z = getelementptr inbounds i8, i8* %x, i32 -127 920 %0 = bitcast i8* %z to <16 x i8>* 921 %1 = load <16 x i8>, <16 x i8>* %0, align 1 922 %2 = bitcast i8* %y to <16 x i8>* 923 store <16 x i8> %1, <16 x i8>* %2, align 1 924 ret i8* %x 925} 926 927define i8* @ldrbu8_m128(i8* %x, i8* %y) { 928; CHECK-LABEL: ldrbu8_m128: 929; CHECK: @ %bb.0: @ %entry 930; CHECK-NEXT: sub.w r2, r0, #128 931; CHECK-NEXT: vldrb.u8 q0, [r2] 932; CHECK-NEXT: vstrb.8 q0, [r1] 933; CHECK-NEXT: bx lr 934entry: 935 %z = getelementptr inbounds i8, i8* %x, i32 -128 936 %0 = bitcast i8* %z to <16 x i8>* 937 %1 = load <16 x i8>, <16 x i8>* %0, align 1 938 %2 = bitcast i8* %y to <16 x i8>* 939 store <16 x i8> %1, <16 x i8>* %2, align 1 940 ret i8* %x 941} 942 943 944define i8* @ldrwf32_4(i8* %x, i8* %y) { 945; CHECK-LABEL: ldrwf32_4: 946; CHECK: @ %bb.0: @ %entry 947; CHECK-NEXT: vldrw.u32 q0, [r0, #4] 948; CHECK-NEXT: vstrw.32 q0, [r1] 949; CHECK-NEXT: bx lr 950entry: 951 %z = getelementptr inbounds i8, i8* %x, i32 4 952 %0 = bitcast i8* %z to <4 x float>* 953 %1 = load <4 x float>, <4 x float>* %0, align 4 954 %2 = bitcast i8* %y to <4 x float>* 955 store <4 x float> %1, <4 x float>* %2, align 4 956 ret i8* %x 957} 958 959define i8* @ldrwf16_4(i8* %x, i8* %y) { 960; CHECK-LABEL: ldrwf16_4: 961; CHECK: @ %bb.0: @ %entry 962; CHECK-NEXT: vldrh.u16 q0, [r0, #4] 963; CHECK-NEXT: vstrh.16 q0, [r1] 964; CHECK-NEXT: bx lr 965entry: 966 %z = getelementptr inbounds i8, i8* %x, i32 4 967 %0 = bitcast i8* %z to <8 x half>* 968 %1 = load <8 x half>, <8 x half>* %0, align 2 969 %2 = bitcast i8* %y to <8 x half>* 970 store <8 x half> %1, <8 x half>* %2, align 2 971 ret i8* %x 972} 973 974define i8* @ldrwi32_align1(i8* %x, i8* %y) { 975; CHECK-LE-LABEL: ldrwi32_align1: 976; CHECK-LE: @ %bb.0: @ %entry 977; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3] 978; CHECK-LE-NEXT: vstrw.32 q0, [r1] 979; CHECK-LE-NEXT: bx lr 980; 981; CHECK-BE-LABEL: ldrwi32_align1: 982; CHECK-BE: @ %bb.0: @ %entry 983; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] 984; CHECK-BE-NEXT: vrev32.8 q0, q0 985; CHECK-BE-NEXT: vstrw.32 q0, [r1] 986; CHECK-BE-NEXT: bx lr 987entry: 988 %z = getelementptr inbounds i8, i8* %x, i32 3 989 %0 = bitcast i8* %z to <4 x i32>* 990 %1 = load <4 x i32>, <4 x i32>* %0, align 1 991 %2 = bitcast i8* %y to <4 x i32>* 992 store <4 x i32> %1, <4 x i32>* %2, align 4 993 ret i8* %x 994} 995 996define i8* @ldrhi16_align1(i8* %x, i8* %y) { 997; CHECK-LE-LABEL: ldrhi16_align1: 998; CHECK-LE: @ %bb.0: @ %entry 999; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3] 1000; CHECK-LE-NEXT: vstrh.16 q0, [r1] 1001; CHECK-LE-NEXT: bx lr 1002; 1003; CHECK-BE-LABEL: ldrhi16_align1: 1004; CHECK-BE: @ %bb.0: @ %entry 1005; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] 1006; CHECK-BE-NEXT: vrev16.8 q0, q0 1007; CHECK-BE-NEXT: vstrh.16 q0, [r1] 1008; CHECK-BE-NEXT: bx lr 1009entry: 1010 %z = getelementptr inbounds i8, i8* %x, i32 3 1011 %0 = bitcast i8* %z to <8 x i16>* 1012 %1 = load <8 x i16>, <8 x i16>* %0, align 1 1013 %2 = bitcast i8* %y to <8 x i16>* 1014 store <8 x i16> %1, <8 x i16>* %2, align 2 1015 ret i8* %x 1016} 1017 1018define i8* @ldrhi32_align1(i8* %x, i8* %y) { 1019; CHECK-LABEL: ldrhi32_align1: 1020; CHECK: @ %bb.0: @ %entry 1021; CHECK-NEXT: .pad #8 1022; CHECK-NEXT: sub sp, #8 1023; CHECK-NEXT: ldr.w r3, [r0, #7] 1024; CHECK-NEXT: ldr.w r2, [r0, #3] 1025; CHECK-NEXT: strd r2, r3, [sp] 1026; CHECK-NEXT: mov r2, sp 1027; CHECK-NEXT: vldrh.s32 q0, [r2] 1028; CHECK-NEXT: vstrw.32 q0, [r1] 1029; CHECK-NEXT: add sp, #8 1030; CHECK-NEXT: bx lr 1031entry: 1032 %z = getelementptr inbounds i8, i8* %x, i32 3 1033 %0 = bitcast i8* %z to <4 x i16>* 1034 %1 = load <4 x i16>, <4 x i16>* %0, align 1 1035 %2 = bitcast i8* %y to <4 x i32>* 1036 %3 = sext <4 x i16> %1 to <4 x i32> 1037 store <4 x i32> %3, <4 x i32>* %2, align 4 1038 ret i8* %x 1039} 1040 1041define i8* @ldrf32_align1(i8* %x, i8* %y) { 1042; CHECK-LE-LABEL: ldrf32_align1: 1043; CHECK-LE: @ %bb.0: @ %entry 1044; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3] 1045; CHECK-LE-NEXT: vstrw.32 q0, [r1] 1046; CHECK-LE-NEXT: bx lr 1047; 1048; CHECK-BE-LABEL: ldrf32_align1: 1049; CHECK-BE: @ %bb.0: @ %entry 1050; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] 1051; CHECK-BE-NEXT: vrev32.8 q0, q0 1052; CHECK-BE-NEXT: vstrw.32 q0, [r1] 1053; CHECK-BE-NEXT: bx lr 1054entry: 1055 %z = getelementptr inbounds i8, i8* %x, i32 3 1056 %0 = bitcast i8* %z to <4 x float>* 1057 %1 = load <4 x float>, <4 x float>* %0, align 1 1058 %2 = bitcast i8* %y to <4 x float>* 1059 store <4 x float> %1, <4 x float>* %2, align 4 1060 ret i8* %x 1061} 1062 1063define i8* @ldrf16_align1(i8* %x, i8* %y) { 1064; CHECK-LE-LABEL: ldrf16_align1: 1065; CHECK-LE: @ %bb.0: @ %entry 1066; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3] 1067; CHECK-LE-NEXT: vstrh.16 q0, [r1] 1068; CHECK-LE-NEXT: bx lr 1069; 1070; CHECK-BE-LABEL: ldrf16_align1: 1071; CHECK-BE: @ %bb.0: @ %entry 1072; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] 1073; CHECK-BE-NEXT: vrev16.8 q0, q0 1074; CHECK-BE-NEXT: vstrh.16 q0, [r1] 1075; CHECK-BE-NEXT: bx lr 1076entry: 1077 %z = getelementptr inbounds i8, i8* %x, i32 3 1078 %0 = bitcast i8* %z to <8 x half>* 1079 %1 = load <8 x half>, <8 x half>* %0, align 1 1080 %2 = bitcast i8* %y to <8 x half>* 1081 store <8 x half> %1, <8 x half>* %2, align 2 1082 ret i8* %x 1083} 1084 1085define i8* @ldrh16_align8(i8* %x, i8* %y) { 1086; CHECK-LE-LABEL: ldrh16_align8: 1087; CHECK-LE: @ %bb.0: @ %entry 1088; CHECK-LE-NEXT: vldrw.u32 q0, [r0, #4] 1089; CHECK-LE-NEXT: vstrh.16 q0, [r1] 1090; CHECK-LE-NEXT: bx lr 1091; 1092; CHECK-BE-LABEL: ldrh16_align8: 1093; CHECK-BE: @ %bb.0: @ %entry 1094; CHECK-BE-NEXT: vldrh.u16 q0, [r0, #4] 1095; CHECK-BE-NEXT: vstrh.16 q0, [r1] 1096; CHECK-BE-NEXT: bx lr 1097entry: 1098 %z = getelementptr inbounds i8, i8* %x, i32 4 1099 %0 = bitcast i8* %z to <8 x i16>* 1100 %1 = load <8 x i16>, <8 x i16>* %0, align 8 1101 %2 = bitcast i8* %y to <8 x i16>* 1102 store <8 x i16> %1, <8 x i16>* %2, align 2 1103 ret i8* %x 1104} 1105 1106 1107 1108 1109 1110define i8* @strw32_4(i8* %y, i8* %x) { 1111; CHECK-LABEL: strw32_4: 1112; CHECK: @ %bb.0: @ %entry 1113; CHECK-NEXT: vldrw.u32 q0, [r1] 1114; CHECK-NEXT: vstrw.32 q0, [r0, #4] 1115; CHECK-NEXT: bx lr 1116entry: 1117 %z = getelementptr inbounds i8, i8* %y, i32 4 1118 %0 = bitcast i8* %x to <4 x i32>* 1119 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1120 %2 = bitcast i8* %z to <4 x i32>* 1121 store <4 x i32> %1, <4 x i32>* %2, align 4 1122 ret i8* %y 1123} 1124 1125define i8* @strw32_3(i8* %y, i8* %x) { 1126; CHECK-LABEL: strw32_3: 1127; CHECK: @ %bb.0: @ %entry 1128; CHECK-NEXT: vldrw.u32 q0, [r1] 1129; CHECK-NEXT: adds r1, r0, #3 1130; CHECK-NEXT: vstrw.32 q0, [r1] 1131; CHECK-NEXT: bx lr 1132entry: 1133 %z = getelementptr inbounds i8, i8* %y, i32 3 1134 %0 = bitcast i8* %x to <4 x i32>* 1135 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1136 %2 = bitcast i8* %z to <4 x i32>* 1137 store <4 x i32> %1, <4 x i32>* %2, align 4 1138 ret i8* %y 1139} 1140 1141define i8* @strw32_m4(i8* %y, i8* %x) { 1142; CHECK-LABEL: strw32_m4: 1143; CHECK: @ %bb.0: @ %entry 1144; CHECK-NEXT: vldrw.u32 q0, [r1] 1145; CHECK-NEXT: vstrw.32 q0, [r0, #-4] 1146; CHECK-NEXT: bx lr 1147entry: 1148 %z = getelementptr inbounds i8, i8* %y, i32 -4 1149 %0 = bitcast i8* %x to <4 x i32>* 1150 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1151 %2 = bitcast i8* %z to <4 x i32>* 1152 store <4 x i32> %1, <4 x i32>* %2, align 4 1153 ret i8* %y 1154} 1155 1156define i8* @strw32_508(i8* %y, i8* %x) { 1157; CHECK-LABEL: strw32_508: 1158; CHECK: @ %bb.0: @ %entry 1159; CHECK-NEXT: vldrw.u32 q0, [r1] 1160; CHECK-NEXT: vstrw.32 q0, [r0, #508] 1161; CHECK-NEXT: bx lr 1162entry: 1163 %z = getelementptr inbounds i8, i8* %y, i32 508 1164 %0 = bitcast i8* %x to <4 x i32>* 1165 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1166 %2 = bitcast i8* %z to <4 x i32>* 1167 store <4 x i32> %1, <4 x i32>* %2, align 4 1168 ret i8* %y 1169} 1170 1171define i8* @strw32_512(i8* %y, i8* %x) { 1172; CHECK-LABEL: strw32_512: 1173; CHECK: @ %bb.0: @ %entry 1174; CHECK-NEXT: vldrw.u32 q0, [r1] 1175; CHECK-NEXT: add.w r1, r0, #512 1176; CHECK-NEXT: vstrw.32 q0, [r1] 1177; CHECK-NEXT: bx lr 1178entry: 1179 %z = getelementptr inbounds i8, i8* %y, i32 512 1180 %0 = bitcast i8* %x to <4 x i32>* 1181 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1182 %2 = bitcast i8* %z to <4 x i32>* 1183 store <4 x i32> %1, <4 x i32>* %2, align 4 1184 ret i8* %y 1185} 1186 1187define i8* @strw32_m508(i8* %y, i8* %x) { 1188; CHECK-LABEL: strw32_m508: 1189; CHECK: @ %bb.0: @ %entry 1190; CHECK-NEXT: vldrw.u32 q0, [r1] 1191; CHECK-NEXT: vstrw.32 q0, [r0, #-508] 1192; CHECK-NEXT: bx lr 1193entry: 1194 %z = getelementptr inbounds i8, i8* %y, i32 -508 1195 %0 = bitcast i8* %x to <4 x i32>* 1196 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1197 %2 = bitcast i8* %z to <4 x i32>* 1198 store <4 x i32> %1, <4 x i32>* %2, align 4 1199 ret i8* %y 1200} 1201 1202define i8* @strw32_m512(i8* %y, i8* %x) { 1203; CHECK-LABEL: strw32_m512: 1204; CHECK: @ %bb.0: @ %entry 1205; CHECK-NEXT: vldrw.u32 q0, [r1] 1206; CHECK-NEXT: sub.w r1, r0, #512 1207; CHECK-NEXT: vstrw.32 q0, [r1] 1208; CHECK-NEXT: bx lr 1209entry: 1210 %z = getelementptr inbounds i8, i8* %y, i32 -512 1211 %0 = bitcast i8* %x to <4 x i32>* 1212 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1213 %2 = bitcast i8* %z to <4 x i32>* 1214 store <4 x i32> %1, <4 x i32>* %2, align 4 1215 ret i8* %y 1216} 1217 1218 1219define i8* @strh32_4(i8* %y, i8* %x) { 1220; CHECK-LABEL: strh32_4: 1221; CHECK: @ %bb.0: @ %entry 1222; CHECK-NEXT: vldrh.u32 q0, [r1] 1223; CHECK-NEXT: vstrh.32 q0, [r0, #4] 1224; CHECK-NEXT: bx lr 1225entry: 1226 %z = getelementptr inbounds i8, i8* %y, i32 4 1227 %0 = bitcast i8* %x to <4 x i16>* 1228 %1 = load <4 x i16>, <4 x i16>* %0, align 2 1229 %2 = bitcast i8* %z to <4 x i16>* 1230 store <4 x i16> %1, <4 x i16>* %2, align 2 1231 ret i8* %y 1232} 1233 1234define i8* @strh32_3(i8* %y, i8* %x) { 1235; CHECK-LABEL: strh32_3: 1236; CHECK: @ %bb.0: @ %entry 1237; CHECK-NEXT: vldrh.u32 q0, [r1] 1238; CHECK-NEXT: adds r1, r0, #3 1239; CHECK-NEXT: vstrh.32 q0, [r1] 1240; CHECK-NEXT: bx lr 1241entry: 1242 %z = getelementptr inbounds i8, i8* %y, i32 3 1243 %0 = bitcast i8* %x to <4 x i16>* 1244 %1 = load <4 x i16>, <4 x i16>* %0, align 2 1245 %2 = bitcast i8* %z to <4 x i16>* 1246 store <4 x i16> %1, <4 x i16>* %2, align 2 1247 ret i8* %y 1248} 1249 1250define i8* @strh32_2(i8* %y, i8* %x) { 1251; CHECK-LABEL: strh32_2: 1252; CHECK: @ %bb.0: @ %entry 1253; CHECK-NEXT: vldrh.u32 q0, [r1] 1254; CHECK-NEXT: vstrh.32 q0, [r0, #2] 1255; CHECK-NEXT: bx lr 1256entry: 1257 %z = getelementptr inbounds i8, i8* %y, i32 2 1258 %0 = bitcast i8* %x to <4 x i16>* 1259 %1 = load <4 x i16>, <4 x i16>* %0, align 2 1260 %2 = bitcast i8* %z to <4 x i16>* 1261 store <4 x i16> %1, <4 x i16>* %2, align 2 1262 ret i8* %y 1263} 1264 1265define i8* @strh32_254(i8* %y, i8* %x) { 1266; CHECK-LABEL: strh32_254: 1267; CHECK: @ %bb.0: @ %entry 1268; CHECK-NEXT: vldrh.u32 q0, [r1] 1269; CHECK-NEXT: vstrh.32 q0, [r0, #254] 1270; CHECK-NEXT: bx lr 1271entry: 1272 %z = getelementptr inbounds i8, i8* %y, i32 254 1273 %0 = bitcast i8* %x to <4 x i16>* 1274 %1 = load <4 x i16>, <4 x i16>* %0, align 2 1275 %2 = bitcast i8* %z to <4 x i16>* 1276 store <4 x i16> %1, <4 x i16>* %2, align 2 1277 ret i8* %y 1278} 1279 1280define i8* @strh32_256(i8* %y, i8* %x) { 1281; CHECK-LABEL: strh32_256: 1282; CHECK: @ %bb.0: @ %entry 1283; CHECK-NEXT: vldrh.u32 q0, [r1] 1284; CHECK-NEXT: add.w r1, r0, #256 1285; CHECK-NEXT: vstrh.32 q0, [r1] 1286; CHECK-NEXT: bx lr 1287entry: 1288 %z = getelementptr inbounds i8, i8* %y, i32 256 1289 %0 = bitcast i8* %x to <4 x i16>* 1290 %1 = load <4 x i16>, <4 x i16>* %0, align 2 1291 %2 = bitcast i8* %z to <4 x i16>* 1292 store <4 x i16> %1, <4 x i16>* %2, align 2 1293 ret i8* %y 1294} 1295 1296define i8* @strh32_m254(i8* %y, i8* %x) { 1297; CHECK-LABEL: strh32_m254: 1298; CHECK: @ %bb.0: @ %entry 1299; CHECK-NEXT: vldrh.u32 q0, [r1] 1300; CHECK-NEXT: vstrh.32 q0, [r0, #-254] 1301; CHECK-NEXT: bx lr 1302entry: 1303 %z = getelementptr inbounds i8, i8* %y, i32 -254 1304 %0 = bitcast i8* %x to <4 x i16>* 1305 %1 = load <4 x i16>, <4 x i16>* %0, align 2 1306 %2 = bitcast i8* %z to <4 x i16>* 1307 store <4 x i16> %1, <4 x i16>* %2, align 2 1308 ret i8* %y 1309} 1310 1311define i8* @strh32_m256(i8* %y, i8* %x) { 1312; CHECK-LABEL: strh32_m256: 1313; CHECK: @ %bb.0: @ %entry 1314; CHECK-NEXT: vldrh.u32 q0, [r1] 1315; CHECK-NEXT: sub.w r1, r0, #256 1316; CHECK-NEXT: vstrh.32 q0, [r1] 1317; CHECK-NEXT: bx lr 1318entry: 1319 %z = getelementptr inbounds i8, i8* %y, i32 -256 1320 %0 = bitcast i8* %x to <4 x i16>* 1321 %1 = load <4 x i16>, <4 x i16>* %0, align 2 1322 %2 = bitcast i8* %z to <4 x i16>* 1323 store <4 x i16> %1, <4 x i16>* %2, align 2 1324 ret i8* %y 1325} 1326 1327 1328define i8* @strh16_4(i8* %y, i8* %x) { 1329; CHECK-LABEL: strh16_4: 1330; CHECK: @ %bb.0: @ %entry 1331; CHECK-NEXT: vldrh.u16 q0, [r1] 1332; CHECK-NEXT: vstrh.16 q0, [r0, #4] 1333; CHECK-NEXT: bx lr 1334entry: 1335 %z = getelementptr inbounds i8, i8* %y, i32 4 1336 %0 = bitcast i8* %x to <8 x i16>* 1337 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1338 %2 = bitcast i8* %z to <8 x i16>* 1339 store <8 x i16> %1, <8 x i16>* %2, align 2 1340 ret i8* %y 1341} 1342 1343define i8* @strh16_3(i8* %y, i8* %x) { 1344; CHECK-LABEL: strh16_3: 1345; CHECK: @ %bb.0: @ %entry 1346; CHECK-NEXT: vldrh.u16 q0, [r1] 1347; CHECK-NEXT: adds r1, r0, #3 1348; CHECK-NEXT: vstrh.16 q0, [r1] 1349; CHECK-NEXT: bx lr 1350entry: 1351 %z = getelementptr inbounds i8, i8* %y, i32 3 1352 %0 = bitcast i8* %x to <8 x i16>* 1353 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1354 %2 = bitcast i8* %z to <8 x i16>* 1355 store <8 x i16> %1, <8 x i16>* %2, align 2 1356 ret i8* %y 1357} 1358 1359define i8* @strh16_2(i8* %y, i8* %x) { 1360; CHECK-LABEL: strh16_2: 1361; CHECK: @ %bb.0: @ %entry 1362; CHECK-NEXT: vldrh.u16 q0, [r1] 1363; CHECK-NEXT: vstrh.16 q0, [r0, #2] 1364; CHECK-NEXT: bx lr 1365entry: 1366 %z = getelementptr inbounds i8, i8* %y, i32 2 1367 %0 = bitcast i8* %x to <8 x i16>* 1368 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1369 %2 = bitcast i8* %z to <8 x i16>* 1370 store <8 x i16> %1, <8 x i16>* %2, align 2 1371 ret i8* %y 1372} 1373 1374define i8* @strh16_254(i8* %y, i8* %x) { 1375; CHECK-LABEL: strh16_254: 1376; CHECK: @ %bb.0: @ %entry 1377; CHECK-NEXT: vldrh.u16 q0, [r1] 1378; CHECK-NEXT: vstrh.16 q0, [r0, #254] 1379; CHECK-NEXT: bx lr 1380entry: 1381 %z = getelementptr inbounds i8, i8* %y, i32 254 1382 %0 = bitcast i8* %x to <8 x i16>* 1383 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1384 %2 = bitcast i8* %z to <8 x i16>* 1385 store <8 x i16> %1, <8 x i16>* %2, align 2 1386 ret i8* %y 1387} 1388 1389define i8* @strh16_256(i8* %y, i8* %x) { 1390; CHECK-LABEL: strh16_256: 1391; CHECK: @ %bb.0: @ %entry 1392; CHECK-NEXT: vldrh.u16 q0, [r1] 1393; CHECK-NEXT: add.w r1, r0, #256 1394; CHECK-NEXT: vstrh.16 q0, [r1] 1395; CHECK-NEXT: bx lr 1396entry: 1397 %z = getelementptr inbounds i8, i8* %y, i32 256 1398 %0 = bitcast i8* %x to <8 x i16>* 1399 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1400 %2 = bitcast i8* %z to <8 x i16>* 1401 store <8 x i16> %1, <8 x i16>* %2, align 2 1402 ret i8* %y 1403} 1404 1405define i8* @strh16_m254(i8* %y, i8* %x) { 1406; CHECK-LABEL: strh16_m254: 1407; CHECK: @ %bb.0: @ %entry 1408; CHECK-NEXT: vldrh.u16 q0, [r1] 1409; CHECK-NEXT: vstrh.16 q0, [r0, #-254] 1410; CHECK-NEXT: bx lr 1411entry: 1412 %z = getelementptr inbounds i8, i8* %y, i32 -254 1413 %0 = bitcast i8* %x to <8 x i16>* 1414 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1415 %2 = bitcast i8* %z to <8 x i16>* 1416 store <8 x i16> %1, <8 x i16>* %2, align 2 1417 ret i8* %y 1418} 1419 1420define i8* @strh16_m256(i8* %y, i8* %x) { 1421; CHECK-LABEL: strh16_m256: 1422; CHECK: @ %bb.0: @ %entry 1423; CHECK-NEXT: vldrh.u16 q0, [r1] 1424; CHECK-NEXT: sub.w r1, r0, #256 1425; CHECK-NEXT: vstrh.16 q0, [r1] 1426; CHECK-NEXT: bx lr 1427entry: 1428 %z = getelementptr inbounds i8, i8* %y, i32 -256 1429 %0 = bitcast i8* %x to <8 x i16>* 1430 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1431 %2 = bitcast i8* %z to <8 x i16>* 1432 store <8 x i16> %1, <8 x i16>* %2, align 2 1433 ret i8* %y 1434} 1435 1436 1437define i8* @strb32_4(i8* %y, i8* %x) { 1438; CHECK-LABEL: strb32_4: 1439; CHECK: @ %bb.0: @ %entry 1440; CHECK-NEXT: vldrb.u32 q0, [r1] 1441; CHECK-NEXT: vstrb.32 q0, [r0, #4] 1442; CHECK-NEXT: bx lr 1443entry: 1444 %z = getelementptr inbounds i8, i8* %y, i32 4 1445 %0 = bitcast i8* %x to <4 x i8>* 1446 %1 = load <4 x i8>, <4 x i8>* %0, align 1 1447 %2 = bitcast i8* %z to <4 x i8>* 1448 store <4 x i8> %1, <4 x i8>* %2, align 1 1449 ret i8* %y 1450} 1451 1452define i8* @strb32_3(i8* %y, i8* %x) { 1453; CHECK-LABEL: strb32_3: 1454; CHECK: @ %bb.0: @ %entry 1455; CHECK-NEXT: vldrb.u32 q0, [r1] 1456; CHECK-NEXT: vstrb.32 q0, [r0, #3] 1457; CHECK-NEXT: bx lr 1458entry: 1459 %z = getelementptr inbounds i8, i8* %y, i32 3 1460 %0 = bitcast i8* %x to <4 x i8>* 1461 %1 = load <4 x i8>, <4 x i8>* %0, align 1 1462 %2 = bitcast i8* %z to <4 x i8>* 1463 store <4 x i8> %1, <4 x i8>* %2, align 1 1464 ret i8* %y 1465} 1466 1467define i8* @strb32_127(i8* %y, i8* %x) { 1468; CHECK-LABEL: strb32_127: 1469; CHECK: @ %bb.0: @ %entry 1470; CHECK-NEXT: vldrb.u32 q0, [r1] 1471; CHECK-NEXT: vstrb.32 q0, [r0, #127] 1472; CHECK-NEXT: bx lr 1473entry: 1474 %z = getelementptr inbounds i8, i8* %y, i32 127 1475 %0 = bitcast i8* %x to <4 x i8>* 1476 %1 = load <4 x i8>, <4 x i8>* %0, align 1 1477 %2 = bitcast i8* %z to <4 x i8>* 1478 store <4 x i8> %1, <4 x i8>* %2, align 1 1479 ret i8* %y 1480} 1481 1482define i8* @strb32_128(i8* %y, i8* %x) { 1483; CHECK-LABEL: strb32_128: 1484; CHECK: @ %bb.0: @ %entry 1485; CHECK-NEXT: vldrb.u32 q0, [r1] 1486; CHECK-NEXT: add.w r1, r0, #128 1487; CHECK-NEXT: vstrb.32 q0, [r1] 1488; CHECK-NEXT: bx lr 1489entry: 1490 %z = getelementptr inbounds i8, i8* %y, i32 128 1491 %0 = bitcast i8* %x to <4 x i8>* 1492 %1 = load <4 x i8>, <4 x i8>* %0, align 1 1493 %2 = bitcast i8* %z to <4 x i8>* 1494 store <4 x i8> %1, <4 x i8>* %2, align 1 1495 ret i8* %y 1496} 1497 1498define i8* @strb32_m127(i8* %y, i8* %x) { 1499; CHECK-LABEL: strb32_m127: 1500; CHECK: @ %bb.0: @ %entry 1501; CHECK-NEXT: vldrb.u32 q0, [r1] 1502; CHECK-NEXT: vstrb.32 q0, [r0, #-127] 1503; CHECK-NEXT: bx lr 1504entry: 1505 %z = getelementptr inbounds i8, i8* %y, i32 -127 1506 %0 = bitcast i8* %x to <4 x i8>* 1507 %1 = load <4 x i8>, <4 x i8>* %0, align 1 1508 %2 = bitcast i8* %z to <4 x i8>* 1509 store <4 x i8> %1, <4 x i8>* %2, align 1 1510 ret i8* %y 1511} 1512 1513define i8* @strb32_m128(i8* %y, i8* %x) { 1514; CHECK-LABEL: strb32_m128: 1515; CHECK: @ %bb.0: @ %entry 1516; CHECK-NEXT: vldrb.u32 q0, [r1] 1517; CHECK-NEXT: sub.w r1, r0, #128 1518; CHECK-NEXT: vstrb.32 q0, [r1] 1519; CHECK-NEXT: bx lr 1520entry: 1521 %z = getelementptr inbounds i8, i8* %y, i32 -128 1522 %0 = bitcast i8* %x to <4 x i8>* 1523 %1 = load <4 x i8>, <4 x i8>* %0, align 1 1524 %2 = bitcast i8* %z to <4 x i8>* 1525 store <4 x i8> %1, <4 x i8>* %2, align 1 1526 ret i8* %y 1527} 1528 1529 1530define i8* @strb16_4(i8* %y, i8* %x) { 1531; CHECK-LABEL: strb16_4: 1532; CHECK: @ %bb.0: @ %entry 1533; CHECK-NEXT: vldrb.u16 q0, [r1] 1534; CHECK-NEXT: vstrb.16 q0, [r0, #4] 1535; CHECK-NEXT: bx lr 1536entry: 1537 %z = getelementptr inbounds i8, i8* %y, i32 4 1538 %0 = bitcast i8* %x to <8 x i8>* 1539 %1 = load <8 x i8>, <8 x i8>* %0, align 1 1540 %2 = bitcast i8* %z to <8 x i8>* 1541 store <8 x i8> %1, <8 x i8>* %2, align 1 1542 ret i8* %y 1543} 1544 1545define i8* @strb16_3(i8* %y, i8* %x) { 1546; CHECK-LABEL: strb16_3: 1547; CHECK: @ %bb.0: @ %entry 1548; CHECK-NEXT: vldrb.u16 q0, [r1] 1549; CHECK-NEXT: vstrb.16 q0, [r0, #3] 1550; CHECK-NEXT: bx lr 1551entry: 1552 %z = getelementptr inbounds i8, i8* %y, i32 3 1553 %0 = bitcast i8* %x to <8 x i8>* 1554 %1 = load <8 x i8>, <8 x i8>* %0, align 1 1555 %2 = bitcast i8* %z to <8 x i8>* 1556 store <8 x i8> %1, <8 x i8>* %2, align 1 1557 ret i8* %y 1558} 1559 1560define i8* @strb16_127(i8* %y, i8* %x) { 1561; CHECK-LABEL: strb16_127: 1562; CHECK: @ %bb.0: @ %entry 1563; CHECK-NEXT: vldrb.u16 q0, [r1] 1564; CHECK-NEXT: vstrb.16 q0, [r0, #127] 1565; CHECK-NEXT: bx lr 1566entry: 1567 %z = getelementptr inbounds i8, i8* %y, i32 127 1568 %0 = bitcast i8* %x to <8 x i8>* 1569 %1 = load <8 x i8>, <8 x i8>* %0, align 1 1570 %2 = bitcast i8* %z to <8 x i8>* 1571 store <8 x i8> %1, <8 x i8>* %2, align 1 1572 ret i8* %y 1573} 1574 1575define i8* @strb16_128(i8* %y, i8* %x) { 1576; CHECK-LABEL: strb16_128: 1577; CHECK: @ %bb.0: @ %entry 1578; CHECK-NEXT: vldrb.u16 q0, [r1] 1579; CHECK-NEXT: add.w r1, r0, #128 1580; CHECK-NEXT: vstrb.16 q0, [r1] 1581; CHECK-NEXT: bx lr 1582entry: 1583 %z = getelementptr inbounds i8, i8* %y, i32 128 1584 %0 = bitcast i8* %x to <8 x i8>* 1585 %1 = load <8 x i8>, <8 x i8>* %0, align 1 1586 %2 = bitcast i8* %z to <8 x i8>* 1587 store <8 x i8> %1, <8 x i8>* %2, align 1 1588 ret i8* %y 1589} 1590 1591define i8* @strb16_m127(i8* %y, i8* %x) { 1592; CHECK-LABEL: strb16_m127: 1593; CHECK: @ %bb.0: @ %entry 1594; CHECK-NEXT: vldrb.u16 q0, [r1] 1595; CHECK-NEXT: vstrb.16 q0, [r0, #-127] 1596; CHECK-NEXT: bx lr 1597entry: 1598 %z = getelementptr inbounds i8, i8* %y, i32 -127 1599 %0 = bitcast i8* %x to <8 x i8>* 1600 %1 = load <8 x i8>, <8 x i8>* %0, align 1 1601 %2 = bitcast i8* %z to <8 x i8>* 1602 store <8 x i8> %1, <8 x i8>* %2, align 1 1603 ret i8* %y 1604} 1605 1606define i8* @strb16_m128(i8* %y, i8* %x) { 1607; CHECK-LABEL: strb16_m128: 1608; CHECK: @ %bb.0: @ %entry 1609; CHECK-NEXT: vldrb.u16 q0, [r1] 1610; CHECK-NEXT: sub.w r1, r0, #128 1611; CHECK-NEXT: vstrb.16 q0, [r1] 1612; CHECK-NEXT: bx lr 1613entry: 1614 %z = getelementptr inbounds i8, i8* %y, i32 -128 1615 %0 = bitcast i8* %x to <8 x i8>* 1616 %1 = load <8 x i8>, <8 x i8>* %0, align 1 1617 %2 = bitcast i8* %z to <8 x i8>* 1618 store <8 x i8> %1, <8 x i8>* %2, align 1 1619 ret i8* %y 1620} 1621 1622 1623define i8* @strb8_4(i8* %y, i8* %x) { 1624; CHECK-LABEL: strb8_4: 1625; CHECK: @ %bb.0: @ %entry 1626; CHECK-NEXT: vldrb.u8 q0, [r1] 1627; CHECK-NEXT: vstrb.8 q0, [r0, #4] 1628; CHECK-NEXT: bx lr 1629entry: 1630 %z = getelementptr inbounds i8, i8* %y, i32 4 1631 %0 = bitcast i8* %x to <16 x i8>* 1632 %1 = load <16 x i8>, <16 x i8>* %0, align 1 1633 %2 = bitcast i8* %z to <16 x i8>* 1634 store <16 x i8> %1, <16 x i8>* %2, align 1 1635 ret i8* %y 1636} 1637 1638define i8* @strb8_3(i8* %y, i8* %x) { 1639; CHECK-LABEL: strb8_3: 1640; CHECK: @ %bb.0: @ %entry 1641; CHECK-NEXT: vldrb.u8 q0, [r1] 1642; CHECK-NEXT: vstrb.8 q0, [r0, #3] 1643; CHECK-NEXT: bx lr 1644entry: 1645 %z = getelementptr inbounds i8, i8* %y, i32 3 1646 %0 = bitcast i8* %x to <16 x i8>* 1647 %1 = load <16 x i8>, <16 x i8>* %0, align 1 1648 %2 = bitcast i8* %z to <16 x i8>* 1649 store <16 x i8> %1, <16 x i8>* %2, align 1 1650 ret i8* %y 1651} 1652 1653define i8* @strb8_127(i8* %y, i8* %x) { 1654; CHECK-LABEL: strb8_127: 1655; CHECK: @ %bb.0: @ %entry 1656; CHECK-NEXT: vldrb.u8 q0, [r1] 1657; CHECK-NEXT: vstrb.8 q0, [r0, #127] 1658; CHECK-NEXT: bx lr 1659entry: 1660 %z = getelementptr inbounds i8, i8* %y, i32 127 1661 %0 = bitcast i8* %x to <16 x i8>* 1662 %1 = load <16 x i8>, <16 x i8>* %0, align 1 1663 %2 = bitcast i8* %z to <16 x i8>* 1664 store <16 x i8> %1, <16 x i8>* %2, align 1 1665 ret i8* %y 1666} 1667 1668define i8* @strb8_128(i8* %y, i8* %x) { 1669; CHECK-LABEL: strb8_128: 1670; CHECK: @ %bb.0: @ %entry 1671; CHECK-NEXT: vldrb.u8 q0, [r1] 1672; CHECK-NEXT: add.w r1, r0, #128 1673; CHECK-NEXT: vstrb.8 q0, [r1] 1674; CHECK-NEXT: bx lr 1675entry: 1676 %z = getelementptr inbounds i8, i8* %y, i32 128 1677 %0 = bitcast i8* %x to <16 x i8>* 1678 %1 = load <16 x i8>, <16 x i8>* %0, align 1 1679 %2 = bitcast i8* %z to <16 x i8>* 1680 store <16 x i8> %1, <16 x i8>* %2, align 1 1681 ret i8* %y 1682} 1683 1684define i8* @strb8_m127(i8* %y, i8* %x) { 1685; CHECK-LABEL: strb8_m127: 1686; CHECK: @ %bb.0: @ %entry 1687; CHECK-NEXT: vldrb.u8 q0, [r1] 1688; CHECK-NEXT: vstrb.8 q0, [r0, #-127] 1689; CHECK-NEXT: bx lr 1690entry: 1691 %z = getelementptr inbounds i8, i8* %y, i32 -127 1692 %0 = bitcast i8* %x to <16 x i8>* 1693 %1 = load <16 x i8>, <16 x i8>* %0, align 1 1694 %2 = bitcast i8* %z to <16 x i8>* 1695 store <16 x i8> %1, <16 x i8>* %2, align 1 1696 ret i8* %y 1697} 1698 1699define i8* @strb8_m128(i8* %y, i8* %x) { 1700; CHECK-LABEL: strb8_m128: 1701; CHECK: @ %bb.0: @ %entry 1702; CHECK-NEXT: vldrb.u8 q0, [r1] 1703; CHECK-NEXT: sub.w r1, r0, #128 1704; CHECK-NEXT: vstrb.8 q0, [r1] 1705; CHECK-NEXT: bx lr 1706entry: 1707 %z = getelementptr inbounds i8, i8* %y, i32 -128 1708 %0 = bitcast i8* %x to <16 x i8>* 1709 %1 = load <16 x i8>, <16 x i8>* %0, align 1 1710 %2 = bitcast i8* %z to <16 x i8>* 1711 store <16 x i8> %1, <16 x i8>* %2, align 1 1712 ret i8* %y 1713} 1714 1715 1716define i8* @strf32_4(i8* %y, i8* %x) { 1717; CHECK-LABEL: strf32_4: 1718; CHECK: @ %bb.0: @ %entry 1719; CHECK-NEXT: vldrw.u32 q0, [r1] 1720; CHECK-NEXT: vstrw.32 q0, [r0, #4] 1721; CHECK-NEXT: bx lr 1722entry: 1723 %z = getelementptr inbounds i8, i8* %y, i32 4 1724 %0 = bitcast i8* %x to <4 x float>* 1725 %1 = load <4 x float>, <4 x float>* %0, align 4 1726 %2 = bitcast i8* %z to <4 x float>* 1727 store <4 x float> %1, <4 x float>* %2, align 4 1728 ret i8* %y 1729} 1730 1731define i8* @strf16_4(i8* %y, i8* %x) { 1732; CHECK-LABEL: strf16_4: 1733; CHECK: @ %bb.0: @ %entry 1734; CHECK-NEXT: vldrh.u16 q0, [r1] 1735; CHECK-NEXT: vstrh.16 q0, [r0, #4] 1736; CHECK-NEXT: bx lr 1737entry: 1738 %z = getelementptr inbounds i8, i8* %y, i32 4 1739 %0 = bitcast i8* %x to <8 x half>* 1740 %1 = load <8 x half>, <8 x half>* %0, align 2 1741 %2 = bitcast i8* %z to <8 x half>* 1742 store <8 x half> %1, <8 x half>* %2, align 2 1743 ret i8* %y 1744} 1745 1746define i8* @strwi32_align1(i8* %y, i8* %x) { 1747; CHECK-LE-LABEL: strwi32_align1: 1748; CHECK-LE: @ %bb.0: @ %entry 1749; CHECK-LE-NEXT: vldrw.u32 q0, [r1] 1750; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3] 1751; CHECK-LE-NEXT: bx lr 1752; 1753; CHECK-BE-LABEL: strwi32_align1: 1754; CHECK-BE: @ %bb.0: @ %entry 1755; CHECK-BE-NEXT: vldrw.u32 q0, [r1] 1756; CHECK-BE-NEXT: vrev32.8 q0, q0 1757; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] 1758; CHECK-BE-NEXT: bx lr 1759entry: 1760 %z = getelementptr inbounds i8, i8* %y, i32 3 1761 %0 = bitcast i8* %x to <4 x i32>* 1762 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1763 %2 = bitcast i8* %z to <4 x i32>* 1764 store <4 x i32> %1, <4 x i32>* %2, align 1 1765 ret i8* %y 1766} 1767 1768define i8* @strhi16_align1(i8* %y, i8* %x) { 1769; CHECK-LE-LABEL: strhi16_align1: 1770; CHECK-LE: @ %bb.0: @ %entry 1771; CHECK-LE-NEXT: vldrh.u16 q0, [r1] 1772; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3] 1773; CHECK-LE-NEXT: bx lr 1774; 1775; CHECK-BE-LABEL: strhi16_align1: 1776; CHECK-BE: @ %bb.0: @ %entry 1777; CHECK-BE-NEXT: vldrh.u16 q0, [r1] 1778; CHECK-BE-NEXT: vrev16.8 q0, q0 1779; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] 1780; CHECK-BE-NEXT: bx lr 1781entry: 1782 %z = getelementptr inbounds i8, i8* %y, i32 3 1783 %0 = bitcast i8* %x to <8 x i16>* 1784 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1785 %2 = bitcast i8* %z to <8 x i16>* 1786 store <8 x i16> %1, <8 x i16>* %2, align 1 1787 ret i8* %y 1788} 1789 1790define i8* @strhi32_align1(i8* %y, i8* %x) { 1791; CHECK-LABEL: strhi32_align1: 1792; CHECK: @ %bb.0: @ %entry 1793; CHECK-NEXT: .pad #8 1794; CHECK-NEXT: sub sp, #8 1795; CHECK-NEXT: vldrw.u32 q0, [r1] 1796; CHECK-NEXT: mov r1, sp 1797; CHECK-NEXT: vstrh.32 q0, [r1] 1798; CHECK-NEXT: ldrd r1, r2, [sp] 1799; CHECK-NEXT: str.w r1, [r0, #3] 1800; CHECK-NEXT: str.w r2, [r0, #7] 1801; CHECK-NEXT: add sp, #8 1802; CHECK-NEXT: bx lr 1803entry: 1804 %z = getelementptr inbounds i8, i8* %y, i32 3 1805 %0 = bitcast i8* %x to <4 x i32>* 1806 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1807 %2 = bitcast i8* %z to <4 x i16>* 1808 %3 = trunc <4 x i32> %1 to <4 x i16> 1809 store <4 x i16> %3, <4 x i16>* %2, align 1 1810 ret i8* %y 1811} 1812 1813define i8* @strf32_align1(i8* %y, i8* %x) { 1814; CHECK-LE-LABEL: strf32_align1: 1815; CHECK-LE: @ %bb.0: @ %entry 1816; CHECK-LE-NEXT: vldrw.u32 q0, [r1] 1817; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3] 1818; CHECK-LE-NEXT: bx lr 1819; 1820; CHECK-BE-LABEL: strf32_align1: 1821; CHECK-BE: @ %bb.0: @ %entry 1822; CHECK-BE-NEXT: vldrw.u32 q0, [r1] 1823; CHECK-BE-NEXT: vrev32.8 q0, q0 1824; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] 1825; CHECK-BE-NEXT: bx lr 1826entry: 1827 %z = getelementptr inbounds i8, i8* %y, i32 3 1828 %0 = bitcast i8* %x to <4 x float>* 1829 %1 = load <4 x float>, <4 x float>* %0, align 4 1830 %2 = bitcast i8* %z to <4 x float>* 1831 store <4 x float> %1, <4 x float>* %2, align 1 1832 ret i8* %y 1833} 1834 1835define i8* @strf16_align1(i8* %y, i8* %x) { 1836; CHECK-LE-LABEL: strf16_align1: 1837; CHECK-LE: @ %bb.0: @ %entry 1838; CHECK-LE-NEXT: vldrh.u16 q0, [r1] 1839; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3] 1840; CHECK-LE-NEXT: bx lr 1841; 1842; CHECK-BE-LABEL: strf16_align1: 1843; CHECK-BE: @ %bb.0: @ %entry 1844; CHECK-BE-NEXT: vldrh.u16 q0, [r1] 1845; CHECK-BE-NEXT: vrev16.8 q0, q0 1846; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] 1847; CHECK-BE-NEXT: bx lr 1848entry: 1849 %z = getelementptr inbounds i8, i8* %y, i32 3 1850 %0 = bitcast i8* %x to <8 x half>* 1851 %1 = load <8 x half>, <8 x half>* %0, align 2 1852 %2 = bitcast i8* %z to <8 x half>* 1853 store <8 x half> %1, <8 x half>* %2, align 1 1854 ret i8* %y 1855} 1856 1857define i8* @strf16_align8(i8* %y, i8* %x) { 1858; CHECK-LE-LABEL: strf16_align8: 1859; CHECK-LE: @ %bb.0: @ %entry 1860; CHECK-LE-NEXT: vldrh.u16 q0, [r1] 1861; CHECK-LE-NEXT: vstrw.32 q0, [r0, #16] 1862; CHECK-LE-NEXT: bx lr 1863; 1864; CHECK-BE-LABEL: strf16_align8: 1865; CHECK-BE: @ %bb.0: @ %entry 1866; CHECK-BE-NEXT: vldrh.u16 q0, [r1] 1867; CHECK-BE-NEXT: vstrh.16 q0, [r0, #16] 1868; CHECK-BE-NEXT: bx lr 1869entry: 1870 %z = getelementptr inbounds i8, i8* %y, i32 16 1871 %0 = bitcast i8* %x to <8 x i16>* 1872 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1873 %2 = bitcast i8* %z to <8 x i16>* 1874 store <8 x i16> %1, <8 x i16>* %2, align 8 1875 ret i8* %y 1876} 1877