1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE 3; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE 4 5define i8* @ldrwu32_4(i8* %x, i8* %y) { 6; CHECK-LABEL: ldrwu32_4: 7; CHECK: @ %bb.0: @ %entry 8; CHECK-NEXT: vldrw.u32 q0, [r0, #4]! 9; CHECK-NEXT: vstrw.32 q0, [r1] 10; CHECK-NEXT: bx lr 11entry: 12 %z = getelementptr inbounds i8, i8* %x, i32 4 13 %0 = bitcast i8* %z to <4 x i32>* 14 %1 = load <4 x i32>, <4 x i32>* %0, align 4 15 %2 = bitcast i8* %y to <4 x i32>* 16 store <4 x i32> %1, <4 x i32>* %2, align 4 17 ret i8* %z 18} 19 20define i8* @ldrwu32_3(i8* %x, i8* %y) { 21; CHECK-LE-LABEL: ldrwu32_3: 22; CHECK-LE: @ %bb.0: @ %entry 23; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! 24; CHECK-LE-NEXT: vstrw.32 q0, [r1] 25; CHECK-LE-NEXT: bx lr 26; 27; CHECK-BE-LABEL: ldrwu32_3: 28; CHECK-BE: @ %bb.0: @ %entry 29; CHECK-BE-NEXT: adds r0, #3 30; CHECK-BE-NEXT: vldrw.u32 q0, [r0] 31; CHECK-BE-NEXT: vstrw.32 q0, [r1] 32; CHECK-BE-NEXT: bx lr 33entry: 34 %z = getelementptr inbounds i8, i8* %x, i32 3 35 %0 = bitcast i8* %z to <4 x i32>* 36 %1 = load <4 x i32>, <4 x i32>* %0, align 4 37 %2 = bitcast i8* %y to <4 x i32>* 38 store <4 x i32> %1, <4 x i32>* %2, align 4 39 ret i8* %z 40} 41 42define i8* @ldrwu32_m4(i8* %x, i8* %y) { 43; CHECK-LABEL: ldrwu32_m4: 44; CHECK: @ %bb.0: @ %entry 45; CHECK-NEXT: vldrw.u32 q0, [r0, #-4]! 46; CHECK-NEXT: vstrw.32 q0, [r1] 47; CHECK-NEXT: bx lr 48entry: 49 %z = getelementptr inbounds i8, i8* %x, i32 -4 50 %0 = bitcast i8* %z to <4 x i32>* 51 %1 = load <4 x i32>, <4 x i32>* %0, align 4 52 %2 = bitcast i8* %y to <4 x i32>* 53 store <4 x i32> %1, <4 x i32>* %2, align 4 54 ret i8* %z 55} 56 57define i8* @ldrwu32_508(i8* %x, i8* %y) { 58; CHECK-LABEL: ldrwu32_508: 59; CHECK: @ %bb.0: @ %entry 60; CHECK-NEXT: vldrw.u32 q0, [r0, #508]! 61; CHECK-NEXT: vstrw.32 q0, [r1] 62; CHECK-NEXT: bx lr 63entry: 64 %z = getelementptr inbounds i8, i8* %x, i32 508 65 %0 = bitcast i8* %z to <4 x i32>* 66 %1 = load <4 x i32>, <4 x i32>* %0, align 4 67 %2 = bitcast i8* %y to <4 x i32>* 68 store <4 x i32> %1, <4 x i32>* %2, align 4 69 ret i8* %z 70} 71 72define i8* @ldrwu32_512(i8* %x, i8* %y) { 73; CHECK-LABEL: ldrwu32_512: 74; CHECK: @ %bb.0: @ %entry 75; CHECK-NEXT: add.w r0, r0, #512 76; CHECK-NEXT: vldrw.u32 q0, [r0] 77; CHECK-NEXT: vstrw.32 q0, [r1] 78; CHECK-NEXT: bx lr 79entry: 80 %z = getelementptr inbounds i8, i8* %x, i32 512 81 %0 = bitcast i8* %z to <4 x i32>* 82 %1 = load <4 x i32>, <4 x i32>* %0, align 4 83 %2 = bitcast i8* %y to <4 x i32>* 84 store <4 x i32> %1, <4 x i32>* %2, align 4 85 ret i8* %z 86} 87 88define i8* @ldrwu32_m508(i8* %x, i8* %y) { 89; CHECK-LABEL: ldrwu32_m508: 90; CHECK: @ %bb.0: @ %entry 91; CHECK-NEXT: vldrw.u32 q0, [r0, #-508]! 92; CHECK-NEXT: vstrw.32 q0, [r1] 93; CHECK-NEXT: bx lr 94entry: 95 %z = getelementptr inbounds i8, i8* %x, i32 -508 96 %0 = bitcast i8* %z to <4 x i32>* 97 %1 = load <4 x i32>, <4 x i32>* %0, align 4 98 %2 = bitcast i8* %y to <4 x i32>* 99 store <4 x i32> %1, <4 x i32>* %2, align 4 100 ret i8* %z 101} 102 103define i8* @ldrwu32_m512(i8* %x, i8* %y) { 104; CHECK-LABEL: ldrwu32_m512: 105; CHECK: @ %bb.0: @ %entry 106; CHECK-NEXT: sub.w r0, r0, #512 107; CHECK-NEXT: vldrw.u32 q0, [r0] 108; CHECK-NEXT: vstrw.32 q0, [r1] 109; CHECK-NEXT: bx lr 110entry: 111 %z = getelementptr inbounds i8, i8* %x, i32 -512 112 %0 = bitcast i8* %z to <4 x i32>* 113 %1 = load <4 x i32>, <4 x i32>* %0, align 4 114 %2 = bitcast i8* %y to <4 x i32>* 115 store <4 x i32> %1, <4 x i32>* %2, align 4 116 ret i8* %z 117} 118 119 120define i8* @ldrhu32_4(i8* %x, i8* %y) { 121; CHECK-LABEL: ldrhu32_4: 122; CHECK: @ %bb.0: @ %entry 123; CHECK-NEXT: vldrh.u32 q0, [r0, #4]! 124; CHECK-NEXT: vstrw.32 q0, [r1] 125; CHECK-NEXT: bx lr 126entry: 127 %z = getelementptr inbounds i8, i8* %x, i32 4 128 %0 = bitcast i8* %z to <4 x i16>* 129 %1 = load <4 x i16>, <4 x i16>* %0, align 2 130 %2 = zext <4 x i16> %1 to <4 x i32> 131 %3 = bitcast i8* %y to <4 x i32>* 132 store <4 x i32> %2, <4 x i32>* %3, align 4 133 ret i8* %z 134} 135 136define i8* @ldrhu32_3(i8* %x, i8* %y) { 137; CHECK-LABEL: ldrhu32_3: 138; CHECK: @ %bb.0: @ %entry 139; CHECK-NEXT: adds r0, #3 140; CHECK-NEXT: vldrh.u32 q0, [r0] 141; CHECK-NEXT: vstrw.32 q0, [r1] 142; CHECK-NEXT: bx lr 143entry: 144 %z = getelementptr inbounds i8, i8* %x, i32 3 145 %0 = bitcast i8* %z to <4 x i16>* 146 %1 = load <4 x i16>, <4 x i16>* %0, align 2 147 %2 = zext <4 x i16> %1 to <4 x i32> 148 %3 = bitcast i8* %y to <4 x i32>* 149 store <4 x i32> %2, <4 x i32>* %3, align 4 150 ret i8* %z 151} 152 153define i8* @ldrhu32_2(i8* %x, i8* %y) { 154; CHECK-LABEL: ldrhu32_2: 155; CHECK: @ %bb.0: @ %entry 156; CHECK-NEXT: vldrh.u32 q0, [r0, #2]! 157; CHECK-NEXT: vstrw.32 q0, [r1] 158; CHECK-NEXT: bx lr 159entry: 160 %z = getelementptr inbounds i8, i8* %x, i32 2 161 %0 = bitcast i8* %z to <4 x i16>* 162 %1 = load <4 x i16>, <4 x i16>* %0, align 2 163 %2 = zext <4 x i16> %1 to <4 x i32> 164 %3 = bitcast i8* %y to <4 x i32>* 165 store <4 x i32> %2, <4 x i32>* %3, align 4 166 ret i8* %z 167} 168 169define i8* @ldrhu32_254(i8* %x, i8* %y) { 170; CHECK-LABEL: ldrhu32_254: 171; CHECK: @ %bb.0: @ %entry 172; CHECK-NEXT: vldrh.u32 q0, [r0, #254]! 173; CHECK-NEXT: vstrw.32 q0, [r1] 174; CHECK-NEXT: bx lr 175entry: 176 %z = getelementptr inbounds i8, i8* %x, i32 254 177 %0 = bitcast i8* %z to <4 x i16>* 178 %1 = load <4 x i16>, <4 x i16>* %0, align 2 179 %2 = zext <4 x i16> %1 to <4 x i32> 180 %3 = bitcast i8* %y to <4 x i32>* 181 store <4 x i32> %2, <4 x i32>* %3, align 4 182 ret i8* %z 183} 184 185define i8* @ldrhu32_256(i8* %x, i8* %y) { 186; CHECK-LABEL: ldrhu32_256: 187; CHECK: @ %bb.0: @ %entry 188; CHECK-NEXT: add.w r0, r0, #256 189; CHECK-NEXT: vldrh.u32 q0, [r0] 190; CHECK-NEXT: vstrw.32 q0, [r1] 191; CHECK-NEXT: bx lr 192entry: 193 %z = getelementptr inbounds i8, i8* %x, i32 256 194 %0 = bitcast i8* %z to <4 x i16>* 195 %1 = load <4 x i16>, <4 x i16>* %0, align 2 196 %2 = zext <4 x i16> %1 to <4 x i32> 197 %3 = bitcast i8* %y to <4 x i32>* 198 store <4 x i32> %2, <4 x i32>* %3, align 4 199 ret i8* %z 200} 201 202define i8* @ldrhu32_m254(i8* %x, i8* %y) { 203; CHECK-LABEL: ldrhu32_m254: 204; CHECK: @ %bb.0: @ %entry 205; CHECK-NEXT: vldrh.u32 q0, [r0, #-254]! 206; CHECK-NEXT: vstrw.32 q0, [r1] 207; CHECK-NEXT: bx lr 208entry: 209 %z = getelementptr inbounds i8, i8* %x, i32 -254 210 %0 = bitcast i8* %z to <4 x i16>* 211 %1 = load <4 x i16>, <4 x i16>* %0, align 2 212 %2 = zext <4 x i16> %1 to <4 x i32> 213 %3 = bitcast i8* %y to <4 x i32>* 214 store <4 x i32> %2, <4 x i32>* %3, align 4 215 ret i8* %z 216} 217 218define i8* @ldrhu32_m256(i8* %x, i8* %y) { 219; CHECK-LABEL: ldrhu32_m256: 220; CHECK: @ %bb.0: @ %entry 221; CHECK-NEXT: sub.w r0, r0, #256 222; CHECK-NEXT: vldrh.u32 q0, [r0] 223; CHECK-NEXT: vstrw.32 q0, [r1] 224; CHECK-NEXT: bx lr 225entry: 226 %z = getelementptr inbounds i8, i8* %x, i32 -256 227 %0 = bitcast i8* %z to <4 x i16>* 228 %1 = load <4 x i16>, <4 x i16>* %0, align 2 229 %2 = zext <4 x i16> %1 to <4 x i32> 230 %3 = bitcast i8* %y to <4 x i32>* 231 store <4 x i32> %2, <4 x i32>* %3, align 4 232 ret i8* %z 233} 234 235 236define i8* @ldrhs32_4(i8* %x, i8* %y) { 237; CHECK-LABEL: ldrhs32_4: 238; CHECK: @ %bb.0: @ %entry 239; CHECK-NEXT: vldrh.s32 q0, [r0, #4]! 240; CHECK-NEXT: vstrw.32 q0, [r1] 241; CHECK-NEXT: bx lr 242entry: 243 %z = getelementptr inbounds i8, i8* %x, i32 4 244 %0 = bitcast i8* %z to <4 x i16>* 245 %1 = load <4 x i16>, <4 x i16>* %0, align 2 246 %2 = sext <4 x i16> %1 to <4 x i32> 247 %3 = bitcast i8* %y to <4 x i32>* 248 store <4 x i32> %2, <4 x i32>* %3, align 4 249 ret i8* %z 250} 251 252define i8* @ldrhs32_3(i8* %x, i8* %y) { 253; CHECK-LABEL: ldrhs32_3: 254; CHECK: @ %bb.0: @ %entry 255; CHECK-NEXT: adds r0, #3 256; CHECK-NEXT: vldrh.s32 q0, [r0] 257; CHECK-NEXT: vstrw.32 q0, [r1] 258; CHECK-NEXT: bx lr 259entry: 260 %z = getelementptr inbounds i8, i8* %x, i32 3 261 %0 = bitcast i8* %z to <4 x i16>* 262 %1 = load <4 x i16>, <4 x i16>* %0, align 2 263 %2 = sext <4 x i16> %1 to <4 x i32> 264 %3 = bitcast i8* %y to <4 x i32>* 265 store <4 x i32> %2, <4 x i32>* %3, align 4 266 ret i8* %z 267} 268 269define i8* @ldrhs32_2(i8* %x, i8* %y) { 270; CHECK-LABEL: ldrhs32_2: 271; CHECK: @ %bb.0: @ %entry 272; CHECK-NEXT: vldrh.s32 q0, [r0, #2]! 273; CHECK-NEXT: vstrw.32 q0, [r1] 274; CHECK-NEXT: bx lr 275entry: 276 %z = getelementptr inbounds i8, i8* %x, i32 2 277 %0 = bitcast i8* %z to <4 x i16>* 278 %1 = load <4 x i16>, <4 x i16>* %0, align 2 279 %2 = sext <4 x i16> %1 to <4 x i32> 280 %3 = bitcast i8* %y to <4 x i32>* 281 store <4 x i32> %2, <4 x i32>* %3, align 4 282 ret i8* %z 283} 284 285define i8* @ldrhs32_254(i8* %x, i8* %y) { 286; CHECK-LABEL: ldrhs32_254: 287; CHECK: @ %bb.0: @ %entry 288; CHECK-NEXT: vldrh.s32 q0, [r0, #254]! 289; CHECK-NEXT: vstrw.32 q0, [r1] 290; CHECK-NEXT: bx lr 291entry: 292 %z = getelementptr inbounds i8, i8* %x, i32 254 293 %0 = bitcast i8* %z to <4 x i16>* 294 %1 = load <4 x i16>, <4 x i16>* %0, align 2 295 %2 = sext <4 x i16> %1 to <4 x i32> 296 %3 = bitcast i8* %y to <4 x i32>* 297 store <4 x i32> %2, <4 x i32>* %3, align 4 298 ret i8* %z 299} 300 301define i8* @ldrhs32_256(i8* %x, i8* %y) { 302; CHECK-LABEL: ldrhs32_256: 303; CHECK: @ %bb.0: @ %entry 304; CHECK-NEXT: add.w r0, r0, #256 305; CHECK-NEXT: vldrh.s32 q0, [r0] 306; CHECK-NEXT: vstrw.32 q0, [r1] 307; CHECK-NEXT: bx lr 308entry: 309 %z = getelementptr inbounds i8, i8* %x, i32 256 310 %0 = bitcast i8* %z to <4 x i16>* 311 %1 = load <4 x i16>, <4 x i16>* %0, align 2 312 %2 = sext <4 x i16> %1 to <4 x i32> 313 %3 = bitcast i8* %y to <4 x i32>* 314 store <4 x i32> %2, <4 x i32>* %3, align 4 315 ret i8* %z 316} 317 318define i8* @ldrhs32_m254(i8* %x, i8* %y) { 319; CHECK-LABEL: ldrhs32_m254: 320; CHECK: @ %bb.0: @ %entry 321; CHECK-NEXT: vldrh.s32 q0, [r0, #-254]! 322; CHECK-NEXT: vstrw.32 q0, [r1] 323; CHECK-NEXT: bx lr 324entry: 325 %z = getelementptr inbounds i8, i8* %x, i32 -254 326 %0 = bitcast i8* %z to <4 x i16>* 327 %1 = load <4 x i16>, <4 x i16>* %0, align 2 328 %2 = sext <4 x i16> %1 to <4 x i32> 329 %3 = bitcast i8* %y to <4 x i32>* 330 store <4 x i32> %2, <4 x i32>* %3, align 4 331 ret i8* %z 332} 333 334define i8* @ldrhs32_m256(i8* %x, i8* %y) { 335; CHECK-LABEL: ldrhs32_m256: 336; CHECK: @ %bb.0: @ %entry 337; CHECK-NEXT: sub.w r0, r0, #256 338; CHECK-NEXT: vldrh.s32 q0, [r0] 339; CHECK-NEXT: vstrw.32 q0, [r1] 340; CHECK-NEXT: bx lr 341entry: 342 %z = getelementptr inbounds i8, i8* %x, i32 -256 343 %0 = bitcast i8* %z to <4 x i16>* 344 %1 = load <4 x i16>, <4 x i16>* %0, align 2 345 %2 = sext <4 x i16> %1 to <4 x i32> 346 %3 = bitcast i8* %y to <4 x i32>* 347 store <4 x i32> %2, <4 x i32>* %3, align 4 348 ret i8* %z 349} 350 351 352define i8* @ldrhu16_4(i8* %x, i8* %y) { 353; CHECK-LABEL: ldrhu16_4: 354; CHECK: @ %bb.0: @ %entry 355; CHECK-NEXT: vldrh.u16 q0, [r0, #4]! 356; CHECK-NEXT: vstrh.16 q0, [r1] 357; CHECK-NEXT: bx lr 358entry: 359 %z = getelementptr inbounds i8, i8* %x, i32 4 360 %0 = bitcast i8* %z to <8 x i16>* 361 %1 = load <8 x i16>, <8 x i16>* %0, align 2 362 %2 = bitcast i8* %y to <8 x i16>* 363 store <8 x i16> %1, <8 x i16>* %2, align 2 364 ret i8* %z 365} 366 367define i8* @ldrhu16_3(i8* %x, i8* %y) { 368; CHECK-LE-LABEL: ldrhu16_3: 369; CHECK-LE: @ %bb.0: @ %entry 370; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! 371; CHECK-LE-NEXT: vstrh.16 q0, [r1] 372; CHECK-LE-NEXT: bx lr 373; 374; CHECK-BE-LABEL: ldrhu16_3: 375; CHECK-BE: @ %bb.0: @ %entry 376; CHECK-BE-NEXT: adds r0, #3 377; CHECK-BE-NEXT: vldrh.u16 q0, [r0] 378; CHECK-BE-NEXT: vstrh.16 q0, [r1] 379; CHECK-BE-NEXT: bx lr 380entry: 381 %z = getelementptr inbounds i8, i8* %x, i32 3 382 %0 = bitcast i8* %z to <8 x i16>* 383 %1 = load <8 x i16>, <8 x i16>* %0, align 2 384 %2 = bitcast i8* %y to <8 x i16>* 385 store <8 x i16> %1, <8 x i16>* %2, align 2 386 ret i8* %z 387} 388 389define i8* @ldrhu16_2(i8* %x, i8* %y) { 390; CHECK-LABEL: ldrhu16_2: 391; CHECK: @ %bb.0: @ %entry 392; CHECK-NEXT: vldrh.u16 q0, [r0, #2]! 393; CHECK-NEXT: vstrh.16 q0, [r1] 394; CHECK-NEXT: bx lr 395entry: 396 %z = getelementptr inbounds i8, i8* %x, i32 2 397 %0 = bitcast i8* %z to <8 x i16>* 398 %1 = load <8 x i16>, <8 x i16>* %0, align 2 399 %2 = bitcast i8* %y to <8 x i16>* 400 store <8 x i16> %1, <8 x i16>* %2, align 2 401 ret i8* %z 402} 403 404define i8* @ldrhu16_254(i8* %x, i8* %y) { 405; CHECK-LABEL: ldrhu16_254: 406; CHECK: @ %bb.0: @ %entry 407; CHECK-NEXT: vldrh.u16 q0, [r0, #254]! 408; CHECK-NEXT: vstrh.16 q0, [r1] 409; CHECK-NEXT: bx lr 410entry: 411 %z = getelementptr inbounds i8, i8* %x, i32 254 412 %0 = bitcast i8* %z to <8 x i16>* 413 %1 = load <8 x i16>, <8 x i16>* %0, align 2 414 %2 = bitcast i8* %y to <8 x i16>* 415 store <8 x i16> %1, <8 x i16>* %2, align 2 416 ret i8* %z 417} 418 419define i8* @ldrhu16_256(i8* %x, i8* %y) { 420; CHECK-LABEL: ldrhu16_256: 421; CHECK: @ %bb.0: @ %entry 422; CHECK-NEXT: add.w r0, r0, #256 423; CHECK-NEXT: vldrh.u16 q0, [r0] 424; CHECK-NEXT: vstrh.16 q0, [r1] 425; CHECK-NEXT: bx lr 426entry: 427 %z = getelementptr inbounds i8, i8* %x, i32 256 428 %0 = bitcast i8* %z to <8 x i16>* 429 %1 = load <8 x i16>, <8 x i16>* %0, align 2 430 %2 = bitcast i8* %y to <8 x i16>* 431 store <8 x i16> %1, <8 x i16>* %2, align 2 432 ret i8* %z 433} 434 435define i8* @ldrhu16_m254(i8* %x, i8* %y) { 436; CHECK-LABEL: ldrhu16_m254: 437; CHECK: @ %bb.0: @ %entry 438; CHECK-NEXT: vldrh.u16 q0, [r0, #-254]! 439; CHECK-NEXT: vstrh.16 q0, [r1] 440; CHECK-NEXT: bx lr 441entry: 442 %z = getelementptr inbounds i8, i8* %x, i32 -254 443 %0 = bitcast i8* %z to <8 x i16>* 444 %1 = load <8 x i16>, <8 x i16>* %0, align 2 445 %2 = bitcast i8* %y to <8 x i16>* 446 store <8 x i16> %1, <8 x i16>* %2, align 2 447 ret i8* %z 448} 449 450define i8* @ldrhu16_m256(i8* %x, i8* %y) { 451; CHECK-LABEL: ldrhu16_m256: 452; CHECK: @ %bb.0: @ %entry 453; CHECK-NEXT: sub.w r0, r0, #256 454; CHECK-NEXT: vldrh.u16 q0, [r0] 455; CHECK-NEXT: vstrh.16 q0, [r1] 456; CHECK-NEXT: bx lr 457entry: 458 %z = getelementptr inbounds i8, i8* %x, i32 -256 459 %0 = bitcast i8* %z to <8 x i16>* 460 %1 = load <8 x i16>, <8 x i16>* %0, align 2 461 %2 = bitcast i8* %y to <8 x i16>* 462 store <8 x i16> %1, <8 x i16>* %2, align 2 463 ret i8* %z 464} 465 466 467define i8* @ldrbu32_4(i8* %x, i8* %y) { 468; CHECK-LABEL: ldrbu32_4: 469; CHECK: @ %bb.0: @ %entry 470; CHECK-NEXT: vldrb.u32 q0, [r0, #4]! 471; CHECK-NEXT: vstrw.32 q0, [r1] 472; CHECK-NEXT: bx lr 473entry: 474 %z = getelementptr inbounds i8, i8* %x, i32 4 475 %0 = bitcast i8* %z to <4 x i8>* 476 %1 = load <4 x i8>, <4 x i8>* %0, align 1 477 %2 = zext <4 x i8> %1 to <4 x i32> 478 %3 = bitcast i8* %y to <4 x i32>* 479 store <4 x i32> %2, <4 x i32>* %3, align 4 480 ret i8* %z 481} 482 483define i8* @ldrbu32_3(i8* %x, i8* %y) { 484; CHECK-LABEL: ldrbu32_3: 485; CHECK: @ %bb.0: @ %entry 486; CHECK-NEXT: vldrb.u32 q0, [r0, #3]! 487; CHECK-NEXT: vstrw.32 q0, [r1] 488; CHECK-NEXT: bx lr 489entry: 490 %z = getelementptr inbounds i8, i8* %x, i32 3 491 %0 = bitcast i8* %z to <4 x i8>* 492 %1 = load <4 x i8>, <4 x i8>* %0, align 1 493 %2 = zext <4 x i8> %1 to <4 x i32> 494 %3 = bitcast i8* %y to <4 x i32>* 495 store <4 x i32> %2, <4 x i32>* %3, align 4 496 ret i8* %z 497} 498 499define i8* @ldrbu32_127(i8* %x, i8* %y) { 500; CHECK-LABEL: ldrbu32_127: 501; CHECK: @ %bb.0: @ %entry 502; CHECK-NEXT: vldrb.u32 q0, [r0, #127]! 503; CHECK-NEXT: vstrw.32 q0, [r1] 504; CHECK-NEXT: bx lr 505entry: 506 %z = getelementptr inbounds i8, i8* %x, i32 127 507 %0 = bitcast i8* %z to <4 x i8>* 508 %1 = load <4 x i8>, <4 x i8>* %0, align 1 509 %2 = zext <4 x i8> %1 to <4 x i32> 510 %3 = bitcast i8* %y to <4 x i32>* 511 store <4 x i32> %2, <4 x i32>* %3, align 4 512 ret i8* %z 513} 514 515define i8* @ldrbu32_128(i8* %x, i8* %y) { 516; CHECK-LABEL: ldrbu32_128: 517; CHECK: @ %bb.0: @ %entry 518; CHECK-NEXT: adds r0, #128 519; CHECK-NEXT: vldrb.u32 q0, [r0] 520; CHECK-NEXT: vstrw.32 q0, [r1] 521; CHECK-NEXT: bx lr 522entry: 523 %z = getelementptr inbounds i8, i8* %x, i32 128 524 %0 = bitcast i8* %z to <4 x i8>* 525 %1 = load <4 x i8>, <4 x i8>* %0, align 1 526 %2 = zext <4 x i8> %1 to <4 x i32> 527 %3 = bitcast i8* %y to <4 x i32>* 528 store <4 x i32> %2, <4 x i32>* %3, align 4 529 ret i8* %z 530} 531 532define i8* @ldrbu32_m127(i8* %x, i8* %y) { 533; CHECK-LABEL: ldrbu32_m127: 534; CHECK: @ %bb.0: @ %entry 535; CHECK-NEXT: vldrb.u32 q0, [r0, #-127]! 536; CHECK-NEXT: vstrw.32 q0, [r1] 537; CHECK-NEXT: bx lr 538entry: 539 %z = getelementptr inbounds i8, i8* %x, i32 -127 540 %0 = bitcast i8* %z to <4 x i8>* 541 %1 = load <4 x i8>, <4 x i8>* %0, align 1 542 %2 = zext <4 x i8> %1 to <4 x i32> 543 %3 = bitcast i8* %y to <4 x i32>* 544 store <4 x i32> %2, <4 x i32>* %3, align 4 545 ret i8* %z 546} 547 548define i8* @ldrbu32_m128(i8* %x, i8* %y) { 549; CHECK-LABEL: ldrbu32_m128: 550; CHECK: @ %bb.0: @ %entry 551; CHECK-NEXT: subs r0, #128 552; CHECK-NEXT: vldrb.u32 q0, [r0] 553; CHECK-NEXT: vstrw.32 q0, [r1] 554; CHECK-NEXT: bx lr 555entry: 556 %z = getelementptr inbounds i8, i8* %x, i32 -128 557 %0 = bitcast i8* %z to <4 x i8>* 558 %1 = load <4 x i8>, <4 x i8>* %0, align 1 559 %2 = zext <4 x i8> %1 to <4 x i32> 560 %3 = bitcast i8* %y to <4 x i32>* 561 store <4 x i32> %2, <4 x i32>* %3, align 4 562 ret i8* %z 563} 564 565 566define i8* @ldrbs32_4(i8* %x, i8* %y) { 567; CHECK-LABEL: ldrbs32_4: 568; CHECK: @ %bb.0: @ %entry 569; CHECK-NEXT: vldrb.s32 q0, [r0, #4]! 570; CHECK-NEXT: vstrw.32 q0, [r1] 571; CHECK-NEXT: bx lr 572entry: 573 %z = getelementptr inbounds i8, i8* %x, i32 4 574 %0 = bitcast i8* %z to <4 x i8>* 575 %1 = load <4 x i8>, <4 x i8>* %0, align 1 576 %2 = sext <4 x i8> %1 to <4 x i32> 577 %3 = bitcast i8* %y to <4 x i32>* 578 store <4 x i32> %2, <4 x i32>* %3, align 4 579 ret i8* %z 580} 581 582define i8* @ldrbs32_3(i8* %x, i8* %y) { 583; CHECK-LABEL: ldrbs32_3: 584; CHECK: @ %bb.0: @ %entry 585; CHECK-NEXT: vldrb.s32 q0, [r0, #3]! 586; CHECK-NEXT: vstrw.32 q0, [r1] 587; CHECK-NEXT: bx lr 588entry: 589 %z = getelementptr inbounds i8, i8* %x, i32 3 590 %0 = bitcast i8* %z to <4 x i8>* 591 %1 = load <4 x i8>, <4 x i8>* %0, align 1 592 %2 = sext <4 x i8> %1 to <4 x i32> 593 %3 = bitcast i8* %y to <4 x i32>* 594 store <4 x i32> %2, <4 x i32>* %3, align 4 595 ret i8* %z 596} 597 598define i8* @ldrbs32_127(i8* %x, i8* %y) { 599; CHECK-LABEL: ldrbs32_127: 600; CHECK: @ %bb.0: @ %entry 601; CHECK-NEXT: vldrb.s32 q0, [r0, #127]! 602; CHECK-NEXT: vstrw.32 q0, [r1] 603; CHECK-NEXT: bx lr 604entry: 605 %z = getelementptr inbounds i8, i8* %x, i32 127 606 %0 = bitcast i8* %z to <4 x i8>* 607 %1 = load <4 x i8>, <4 x i8>* %0, align 1 608 %2 = sext <4 x i8> %1 to <4 x i32> 609 %3 = bitcast i8* %y to <4 x i32>* 610 store <4 x i32> %2, <4 x i32>* %3, align 4 611 ret i8* %z 612} 613 614define i8* @ldrbs32_128(i8* %x, i8* %y) { 615; CHECK-LABEL: ldrbs32_128: 616; CHECK: @ %bb.0: @ %entry 617; CHECK-NEXT: adds r0, #128 618; CHECK-NEXT: vldrb.s32 q0, [r0] 619; CHECK-NEXT: vstrw.32 q0, [r1] 620; CHECK-NEXT: bx lr 621entry: 622 %z = getelementptr inbounds i8, i8* %x, i32 128 623 %0 = bitcast i8* %z to <4 x i8>* 624 %1 = load <4 x i8>, <4 x i8>* %0, align 1 625 %2 = sext <4 x i8> %1 to <4 x i32> 626 %3 = bitcast i8* %y to <4 x i32>* 627 store <4 x i32> %2, <4 x i32>* %3, align 4 628 ret i8* %z 629} 630 631define i8* @ldrbs32_m127(i8* %x, i8* %y) { 632; CHECK-LABEL: ldrbs32_m127: 633; CHECK: @ %bb.0: @ %entry 634; CHECK-NEXT: vldrb.s32 q0, [r0, #-127]! 635; CHECK-NEXT: vstrw.32 q0, [r1] 636; CHECK-NEXT: bx lr 637entry: 638 %z = getelementptr inbounds i8, i8* %x, i32 -127 639 %0 = bitcast i8* %z to <4 x i8>* 640 %1 = load <4 x i8>, <4 x i8>* %0, align 1 641 %2 = sext <4 x i8> %1 to <4 x i32> 642 %3 = bitcast i8* %y to <4 x i32>* 643 store <4 x i32> %2, <4 x i32>* %3, align 4 644 ret i8* %z 645} 646 647define i8* @ldrbs32_m128(i8* %x, i8* %y) { 648; CHECK-LABEL: ldrbs32_m128: 649; CHECK: @ %bb.0: @ %entry 650; CHECK-NEXT: subs r0, #128 651; CHECK-NEXT: vldrb.s32 q0, [r0] 652; CHECK-NEXT: vstrw.32 q0, [r1] 653; CHECK-NEXT: bx lr 654entry: 655 %z = getelementptr inbounds i8, i8* %x, i32 -128 656 %0 = bitcast i8* %z to <4 x i8>* 657 %1 = load <4 x i8>, <4 x i8>* %0, align 1 658 %2 = sext <4 x i8> %1 to <4 x i32> 659 %3 = bitcast i8* %y to <4 x i32>* 660 store <4 x i32> %2, <4 x i32>* %3, align 4 661 ret i8* %z 662} 663 664 665define i8* @ldrbu16_4(i8* %x, i8* %y) { 666; CHECK-LABEL: ldrbu16_4: 667; CHECK: @ %bb.0: @ %entry 668; CHECK-NEXT: vldrb.u16 q0, [r0, #4]! 669; CHECK-NEXT: vstrh.16 q0, [r1] 670; CHECK-NEXT: bx lr 671entry: 672 %z = getelementptr inbounds i8, i8* %x, i32 4 673 %0 = bitcast i8* %z to <8 x i8>* 674 %1 = load <8 x i8>, <8 x i8>* %0, align 1 675 %2 = zext <8 x i8> %1 to <8 x i16> 676 %3 = bitcast i8* %y to <8 x i16>* 677 store <8 x i16> %2, <8 x i16>* %3, align 2 678 ret i8* %z 679} 680 681define i8* @ldrbu16_3(i8* %x, i8* %y) { 682; CHECK-LABEL: ldrbu16_3: 683; CHECK: @ %bb.0: @ %entry 684; CHECK-NEXT: vldrb.u16 q0, [r0, #3]! 685; CHECK-NEXT: vstrh.16 q0, [r1] 686; CHECK-NEXT: bx lr 687entry: 688 %z = getelementptr inbounds i8, i8* %x, i32 3 689 %0 = bitcast i8* %z to <8 x i8>* 690 %1 = load <8 x i8>, <8 x i8>* %0, align 1 691 %2 = zext <8 x i8> %1 to <8 x i16> 692 %3 = bitcast i8* %y to <8 x i16>* 693 store <8 x i16> %2, <8 x i16>* %3, align 2 694 ret i8* %z 695} 696 697define i8* @ldrbu16_127(i8* %x, i8* %y) { 698; CHECK-LABEL: ldrbu16_127: 699; CHECK: @ %bb.0: @ %entry 700; CHECK-NEXT: vldrb.u16 q0, [r0, #127]! 701; CHECK-NEXT: vstrh.16 q0, [r1] 702; CHECK-NEXT: bx lr 703entry: 704 %z = getelementptr inbounds i8, i8* %x, i32 127 705 %0 = bitcast i8* %z to <8 x i8>* 706 %1 = load <8 x i8>, <8 x i8>* %0, align 1 707 %2 = zext <8 x i8> %1 to <8 x i16> 708 %3 = bitcast i8* %y to <8 x i16>* 709 store <8 x i16> %2, <8 x i16>* %3, align 2 710 ret i8* %z 711} 712 713define i8* @ldrbu16_128(i8* %x, i8* %y) { 714; CHECK-LABEL: ldrbu16_128: 715; CHECK: @ %bb.0: @ %entry 716; CHECK-NEXT: adds r0, #128 717; CHECK-NEXT: vldrb.u16 q0, [r0] 718; CHECK-NEXT: vstrh.16 q0, [r1] 719; CHECK-NEXT: bx lr 720entry: 721 %z = getelementptr inbounds i8, i8* %x, i32 128 722 %0 = bitcast i8* %z to <8 x i8>* 723 %1 = load <8 x i8>, <8 x i8>* %0, align 1 724 %2 = zext <8 x i8> %1 to <8 x i16> 725 %3 = bitcast i8* %y to <8 x i16>* 726 store <8 x i16> %2, <8 x i16>* %3, align 2 727 ret i8* %z 728} 729 730define i8* @ldrbu16_m127(i8* %x, i8* %y) { 731; CHECK-LABEL: ldrbu16_m127: 732; CHECK: @ %bb.0: @ %entry 733; CHECK-NEXT: vldrb.u16 q0, [r0, #-127]! 734; CHECK-NEXT: vstrh.16 q0, [r1] 735; CHECK-NEXT: bx lr 736entry: 737 %z = getelementptr inbounds i8, i8* %x, i32 -127 738 %0 = bitcast i8* %z to <8 x i8>* 739 %1 = load <8 x i8>, <8 x i8>* %0, align 1 740 %2 = zext <8 x i8> %1 to <8 x i16> 741 %3 = bitcast i8* %y to <8 x i16>* 742 store <8 x i16> %2, <8 x i16>* %3, align 2 743 ret i8* %z 744} 745 746define i8* @ldrbu16_m128(i8* %x, i8* %y) { 747; CHECK-LABEL: ldrbu16_m128: 748; CHECK: @ %bb.0: @ %entry 749; CHECK-NEXT: subs r0, #128 750; CHECK-NEXT: vldrb.u16 q0, [r0] 751; CHECK-NEXT: vstrh.16 q0, [r1] 752; CHECK-NEXT: bx lr 753entry: 754 %z = getelementptr inbounds i8, i8* %x, i32 -128 755 %0 = bitcast i8* %z to <8 x i8>* 756 %1 = load <8 x i8>, <8 x i8>* %0, align 1 757 %2 = zext <8 x i8> %1 to <8 x i16> 758 %3 = bitcast i8* %y to <8 x i16>* 759 store <8 x i16> %2, <8 x i16>* %3, align 2 760 ret i8* %z 761} 762 763 764define i8* @ldrbs16_4(i8* %x, i8* %y) { 765; CHECK-LABEL: ldrbs16_4: 766; CHECK: @ %bb.0: @ %entry 767; CHECK-NEXT: vldrb.s16 q0, [r0, #4]! 768; CHECK-NEXT: vstrh.16 q0, [r1] 769; CHECK-NEXT: bx lr 770entry: 771 %z = getelementptr inbounds i8, i8* %x, i32 4 772 %0 = bitcast i8* %z to <8 x i8>* 773 %1 = load <8 x i8>, <8 x i8>* %0, align 1 774 %2 = sext <8 x i8> %1 to <8 x i16> 775 %3 = bitcast i8* %y to <8 x i16>* 776 store <8 x i16> %2, <8 x i16>* %3, align 2 777 ret i8* %z 778} 779 780define i8* @ldrbs16_3(i8* %x, i8* %y) { 781; CHECK-LABEL: ldrbs16_3: 782; CHECK: @ %bb.0: @ %entry 783; CHECK-NEXT: vldrb.s16 q0, [r0, #3]! 784; CHECK-NEXT: vstrh.16 q0, [r1] 785; CHECK-NEXT: bx lr 786entry: 787 %z = getelementptr inbounds i8, i8* %x, i32 3 788 %0 = bitcast i8* %z to <8 x i8>* 789 %1 = load <8 x i8>, <8 x i8>* %0, align 1 790 %2 = sext <8 x i8> %1 to <8 x i16> 791 %3 = bitcast i8* %y to <8 x i16>* 792 store <8 x i16> %2, <8 x i16>* %3, align 2 793 ret i8* %z 794} 795 796define i8* @ldrbs16_127(i8* %x, i8* %y) { 797; CHECK-LABEL: ldrbs16_127: 798; CHECK: @ %bb.0: @ %entry 799; CHECK-NEXT: vldrb.s16 q0, [r0, #127]! 800; CHECK-NEXT: vstrh.16 q0, [r1] 801; CHECK-NEXT: bx lr 802entry: 803 %z = getelementptr inbounds i8, i8* %x, i32 127 804 %0 = bitcast i8* %z to <8 x i8>* 805 %1 = load <8 x i8>, <8 x i8>* %0, align 1 806 %2 = sext <8 x i8> %1 to <8 x i16> 807 %3 = bitcast i8* %y to <8 x i16>* 808 store <8 x i16> %2, <8 x i16>* %3, align 2 809 ret i8* %z 810} 811 812define i8* @ldrbs16_128(i8* %x, i8* %y) { 813; CHECK-LABEL: ldrbs16_128: 814; CHECK: @ %bb.0: @ %entry 815; CHECK-NEXT: adds r0, #128 816; CHECK-NEXT: vldrb.s16 q0, [r0] 817; CHECK-NEXT: vstrh.16 q0, [r1] 818; CHECK-NEXT: bx lr 819entry: 820 %z = getelementptr inbounds i8, i8* %x, i32 128 821 %0 = bitcast i8* %z to <8 x i8>* 822 %1 = load <8 x i8>, <8 x i8>* %0, align 1 823 %2 = sext <8 x i8> %1 to <8 x i16> 824 %3 = bitcast i8* %y to <8 x i16>* 825 store <8 x i16> %2, <8 x i16>* %3, align 2 826 ret i8* %z 827} 828 829define i8* @ldrbs16_m127(i8* %x, i8* %y) { 830; CHECK-LABEL: ldrbs16_m127: 831; CHECK: @ %bb.0: @ %entry 832; CHECK-NEXT: vldrb.s16 q0, [r0, #-127]! 833; CHECK-NEXT: vstrh.16 q0, [r1] 834; CHECK-NEXT: bx lr 835entry: 836 %z = getelementptr inbounds i8, i8* %x, i32 -127 837 %0 = bitcast i8* %z to <8 x i8>* 838 %1 = load <8 x i8>, <8 x i8>* %0, align 1 839 %2 = sext <8 x i8> %1 to <8 x i16> 840 %3 = bitcast i8* %y to <8 x i16>* 841 store <8 x i16> %2, <8 x i16>* %3, align 2 842 ret i8* %z 843} 844 845define i8* @ldrbs16_m128(i8* %x, i8* %y) { 846; CHECK-LABEL: ldrbs16_m128: 847; CHECK: @ %bb.0: @ %entry 848; CHECK-NEXT: subs r0, #128 849; CHECK-NEXT: vldrb.s16 q0, [r0] 850; CHECK-NEXT: vstrh.16 q0, [r1] 851; CHECK-NEXT: bx lr 852entry: 853 %z = getelementptr inbounds i8, i8* %x, i32 -128 854 %0 = bitcast i8* %z to <8 x i8>* 855 %1 = load <8 x i8>, <8 x i8>* %0, align 1 856 %2 = sext <8 x i8> %1 to <8 x i16> 857 %3 = bitcast i8* %y to <8 x i16>* 858 store <8 x i16> %2, <8 x i16>* %3, align 2 859 ret i8* %z 860} 861 862 863define i8* @ldrbu8_4(i8* %x, i8* %y) { 864; CHECK-LABEL: ldrbu8_4: 865; CHECK: @ %bb.0: @ %entry 866; CHECK-NEXT: vldrb.u8 q0, [r0, #4]! 867; CHECK-NEXT: vstrb.8 q0, [r1] 868; CHECK-NEXT: bx lr 869entry: 870 %z = getelementptr inbounds i8, i8* %x, i32 4 871 %0 = bitcast i8* %z to <16 x i8>* 872 %1 = load <16 x i8>, <16 x i8>* %0, align 1 873 %2 = bitcast i8* %y to <16 x i8>* 874 store <16 x i8> %1, <16 x i8>* %2, align 1 875 ret i8* %z 876} 877 878define i8* @ldrbu8_3(i8* %x, i8* %y) { 879; CHECK-LABEL: ldrbu8_3: 880; CHECK: @ %bb.0: @ %entry 881; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! 882; CHECK-NEXT: vstrb.8 q0, [r1] 883; CHECK-NEXT: bx lr 884entry: 885 %z = getelementptr inbounds i8, i8* %x, i32 3 886 %0 = bitcast i8* %z to <16 x i8>* 887 %1 = load <16 x i8>, <16 x i8>* %0, align 1 888 %2 = bitcast i8* %y to <16 x i8>* 889 store <16 x i8> %1, <16 x i8>* %2, align 1 890 ret i8* %z 891} 892 893define i8* @ldrbu8_127(i8* %x, i8* %y) { 894; CHECK-LABEL: ldrbu8_127: 895; CHECK: @ %bb.0: @ %entry 896; CHECK-NEXT: vldrb.u8 q0, [r0, #127]! 897; CHECK-NEXT: vstrb.8 q0, [r1] 898; CHECK-NEXT: bx lr 899entry: 900 %z = getelementptr inbounds i8, i8* %x, i32 127 901 %0 = bitcast i8* %z to <16 x i8>* 902 %1 = load <16 x i8>, <16 x i8>* %0, align 1 903 %2 = bitcast i8* %y to <16 x i8>* 904 store <16 x i8> %1, <16 x i8>* %2, align 1 905 ret i8* %z 906} 907 908define i8* @ldrbu8_128(i8* %x, i8* %y) { 909; CHECK-LABEL: ldrbu8_128: 910; CHECK: @ %bb.0: @ %entry 911; CHECK-NEXT: adds r0, #128 912; CHECK-NEXT: vldrb.u8 q0, [r0] 913; CHECK-NEXT: vstrb.8 q0, [r1] 914; CHECK-NEXT: bx lr 915entry: 916 %z = getelementptr inbounds i8, i8* %x, i32 128 917 %0 = bitcast i8* %z to <16 x i8>* 918 %1 = load <16 x i8>, <16 x i8>* %0, align 1 919 %2 = bitcast i8* %y to <16 x i8>* 920 store <16 x i8> %1, <16 x i8>* %2, align 1 921 ret i8* %z 922} 923 924define i8* @ldrbu8_m127(i8* %x, i8* %y) { 925; CHECK-LABEL: ldrbu8_m127: 926; CHECK: @ %bb.0: @ %entry 927; CHECK-NEXT: vldrb.u8 q0, [r0, #-127]! 928; CHECK-NEXT: vstrb.8 q0, [r1] 929; CHECK-NEXT: bx lr 930entry: 931 %z = getelementptr inbounds i8, i8* %x, i32 -127 932 %0 = bitcast i8* %z to <16 x i8>* 933 %1 = load <16 x i8>, <16 x i8>* %0, align 1 934 %2 = bitcast i8* %y to <16 x i8>* 935 store <16 x i8> %1, <16 x i8>* %2, align 1 936 ret i8* %z 937} 938 939define i8* @ldrbu8_m128(i8* %x, i8* %y) { 940; CHECK-LABEL: ldrbu8_m128: 941; CHECK: @ %bb.0: @ %entry 942; CHECK-NEXT: subs r0, #128 943; CHECK-NEXT: vldrb.u8 q0, [r0] 944; CHECK-NEXT: vstrb.8 q0, [r1] 945; CHECK-NEXT: bx lr 946entry: 947 %z = getelementptr inbounds i8, i8* %x, i32 -128 948 %0 = bitcast i8* %z to <16 x i8>* 949 %1 = load <16 x i8>, <16 x i8>* %0, align 1 950 %2 = bitcast i8* %y to <16 x i8>* 951 store <16 x i8> %1, <16 x i8>* %2, align 1 952 ret i8* %z 953} 954 955 956define i8* @ldrwf32_4(i8* %x, i8* %y) { 957; CHECK-LABEL: ldrwf32_4: 958; CHECK: @ %bb.0: @ %entry 959; CHECK-NEXT: vldrw.u32 q0, [r0, #4]! 960; CHECK-NEXT: vstrw.32 q0, [r1] 961; CHECK-NEXT: bx lr 962entry: 963 %z = getelementptr inbounds i8, i8* %x, i32 4 964 %0 = bitcast i8* %z to <4 x float>* 965 %1 = load <4 x float>, <4 x float>* %0, align 4 966 %2 = bitcast i8* %y to <4 x float>* 967 store <4 x float> %1, <4 x float>* %2, align 4 968 ret i8* %z 969} 970 971define i8* @ldrwf16_4(i8* %x, i8* %y) { 972; CHECK-LABEL: ldrwf16_4: 973; CHECK: @ %bb.0: @ %entry 974; CHECK-NEXT: vldrh.u16 q0, [r0, #4]! 975; CHECK-NEXT: vstrh.16 q0, [r1] 976; CHECK-NEXT: bx lr 977entry: 978 %z = getelementptr inbounds i8, i8* %x, i32 4 979 %0 = bitcast i8* %z to <8 x half>* 980 %1 = load <8 x half>, <8 x half>* %0, align 2 981 %2 = bitcast i8* %y to <8 x half>* 982 store <8 x half> %1, <8 x half>* %2, align 2 983 ret i8* %z 984} 985 986define i8* @ldrwi32_align1(i8* %x, i8* %y) { 987; CHECK-LE-LABEL: ldrwi32_align1: 988; CHECK-LE: @ %bb.0: @ %entry 989; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! 990; CHECK-LE-NEXT: vstrw.32 q0, [r1] 991; CHECK-LE-NEXT: bx lr 992; 993; CHECK-BE-LABEL: ldrwi32_align1: 994; CHECK-BE: @ %bb.0: @ %entry 995; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] 996; CHECK-BE-NEXT: adds r0, #3 997; CHECK-BE-NEXT: vrev32.8 q0, q0 998; CHECK-BE-NEXT: vstrw.32 q0, [r1] 999; CHECK-BE-NEXT: bx lr 1000entry: 1001 %z = getelementptr inbounds i8, i8* %x, i32 3 1002 %0 = bitcast i8* %z to <4 x i32>* 1003 %1 = load <4 x i32>, <4 x i32>* %0, align 1 1004 %2 = bitcast i8* %y to <4 x i32>* 1005 store <4 x i32> %1, <4 x i32>* %2, align 4 1006 ret i8* %z 1007} 1008 1009define i8* @ldrhi16_align1(i8* %x, i8* %y) { 1010; CHECK-LE-LABEL: ldrhi16_align1: 1011; CHECK-LE: @ %bb.0: @ %entry 1012; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! 1013; CHECK-LE-NEXT: vstrh.16 q0, [r1] 1014; CHECK-LE-NEXT: bx lr 1015; 1016; CHECK-BE-LABEL: ldrhi16_align1: 1017; CHECK-BE: @ %bb.0: @ %entry 1018; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] 1019; CHECK-BE-NEXT: adds r0, #3 1020; CHECK-BE-NEXT: vrev16.8 q0, q0 1021; CHECK-BE-NEXT: vstrh.16 q0, [r1] 1022; CHECK-BE-NEXT: bx lr 1023entry: 1024 %z = getelementptr inbounds i8, i8* %x, i32 3 1025 %0 = bitcast i8* %z to <8 x i16>* 1026 %1 = load <8 x i16>, <8 x i16>* %0, align 1 1027 %2 = bitcast i8* %y to <8 x i16>* 1028 store <8 x i16> %1, <8 x i16>* %2, align 2 1029 ret i8* %z 1030} 1031 1032define i8* @ldrhi32_align1(i8* %x, i8* %y) { 1033; CHECK-LABEL: ldrhi32_align1: 1034; CHECK: @ %bb.0: @ %entry 1035; CHECK-NEXT: .pad #8 1036; CHECK-NEXT: sub sp, #8 1037; CHECK-NEXT: ldr r2, [r0, #3]! 1038; CHECK-NEXT: str r2, [sp] 1039; CHECK-NEXT: ldr r2, [r0, #4] 1040; CHECK-NEXT: str r2, [sp, #4] 1041; CHECK-NEXT: mov r2, sp 1042; CHECK-NEXT: vldrh.s32 q0, [r2] 1043; CHECK-NEXT: vstrw.32 q0, [r1] 1044; CHECK-NEXT: add sp, #8 1045; CHECK-NEXT: bx lr 1046entry: 1047 %z = getelementptr inbounds i8, i8* %x, i32 3 1048 %0 = bitcast i8* %z to <4 x i16>* 1049 %1 = load <4 x i16>, <4 x i16>* %0, align 1 1050 %2 = bitcast i8* %y to <4 x i32>* 1051 %3 = sext <4 x i16> %1 to <4 x i32> 1052 store <4 x i32> %3, <4 x i32>* %2, align 4 1053 ret i8* %z 1054} 1055 1056define i8* @ldrf32_align1(i8* %x, i8* %y) { 1057; CHECK-LE-LABEL: ldrf32_align1: 1058; CHECK-LE: @ %bb.0: @ %entry 1059; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! 1060; CHECK-LE-NEXT: vstrw.32 q0, [r1] 1061; CHECK-LE-NEXT: bx lr 1062; 1063; CHECK-BE-LABEL: ldrf32_align1: 1064; CHECK-BE: @ %bb.0: @ %entry 1065; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] 1066; CHECK-BE-NEXT: adds r0, #3 1067; CHECK-BE-NEXT: vrev32.8 q0, q0 1068; CHECK-BE-NEXT: vstrw.32 q0, [r1] 1069; CHECK-BE-NEXT: bx lr 1070entry: 1071 %z = getelementptr inbounds i8, i8* %x, i32 3 1072 %0 = bitcast i8* %z to <4 x float>* 1073 %1 = load <4 x float>, <4 x float>* %0, align 1 1074 %2 = bitcast i8* %y to <4 x float>* 1075 store <4 x float> %1, <4 x float>* %2, align 4 1076 ret i8* %z 1077} 1078 1079define i8* @ldrf16_align1(i8* %x, i8* %y) { 1080; CHECK-LE-LABEL: ldrf16_align1: 1081; CHECK-LE: @ %bb.0: @ %entry 1082; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! 1083; CHECK-LE-NEXT: vstrh.16 q0, [r1] 1084; CHECK-LE-NEXT: bx lr 1085; 1086; CHECK-BE-LABEL: ldrf16_align1: 1087; CHECK-BE: @ %bb.0: @ %entry 1088; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] 1089; CHECK-BE-NEXT: adds r0, #3 1090; CHECK-BE-NEXT: vrev16.8 q0, q0 1091; CHECK-BE-NEXT: vstrh.16 q0, [r1] 1092; CHECK-BE-NEXT: bx lr 1093entry: 1094 %z = getelementptr inbounds i8, i8* %x, i32 3 1095 %0 = bitcast i8* %z to <8 x half>* 1096 %1 = load <8 x half>, <8 x half>* %0, align 1 1097 %2 = bitcast i8* %y to <8 x half>* 1098 store <8 x half> %1, <8 x half>* %2, align 2 1099 ret i8* %z 1100} 1101 1102define i8* @ldrh16_align8(i8* %x, i8* %y) { 1103; CHECK-LE-LABEL: ldrh16_align8: 1104; CHECK-LE: @ %bb.0: @ %entry 1105; CHECK-LE-NEXT: vldrw.u32 q0, [r0, #4]! 1106; CHECK-LE-NEXT: vstrh.16 q0, [r1] 1107; CHECK-LE-NEXT: bx lr 1108; 1109; CHECK-BE-LABEL: ldrh16_align8: 1110; CHECK-BE: @ %bb.0: @ %entry 1111; CHECK-BE-NEXT: vldrh.u16 q0, [r0, #4]! 1112; CHECK-BE-NEXT: vstrh.16 q0, [r1] 1113; CHECK-BE-NEXT: bx lr 1114entry: 1115 %z = getelementptr inbounds i8, i8* %x, i32 4 1116 %0 = bitcast i8* %z to <8 x i16>* 1117 %1 = load <8 x i16>, <8 x i16>* %0, align 8 1118 %2 = bitcast i8* %y to <8 x i16>* 1119 store <8 x i16> %1, <8 x i16>* %2, align 2 1120 ret i8* %z 1121} 1122 1123 1124 1125 1126 1127define i8* @strw32_4(i8* %y, i8* %x) { 1128; CHECK-LE-LABEL: strw32_4: 1129; CHECK-LE: @ %bb.0: @ %entry 1130; CHECK-LE-NEXT: vldrw.u32 q0, [r1] 1131; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]! 1132; CHECK-LE-NEXT: bx lr 1133; 1134; CHECK-BE-LABEL: strw32_4: 1135; CHECK-BE: @ %bb.0: @ %entry 1136; CHECK-BE-NEXT: vldrw.u32 q0, [r1] 1137; CHECK-BE-NEXT: vstrw.32 q0, [r0, #4]! 1138; CHECK-BE-NEXT: bx lr 1139entry: 1140 %z = getelementptr inbounds i8, i8* %y, i32 4 1141 %0 = bitcast i8* %x to <4 x i32>* 1142 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1143 %2 = bitcast i8* %z to <4 x i32>* 1144 store <4 x i32> %1, <4 x i32>* %2, align 4 1145 ret i8* %z 1146} 1147 1148define i8* @strw32_3(i8* %y, i8* %x) { 1149; CHECK-LE-LABEL: strw32_3: 1150; CHECK-LE: @ %bb.0: @ %entry 1151; CHECK-LE-NEXT: vldrw.u32 q0, [r1] 1152; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! 1153; CHECK-LE-NEXT: bx lr 1154; 1155; CHECK-BE-LABEL: strw32_3: 1156; CHECK-BE: @ %bb.0: @ %entry 1157; CHECK-BE-NEXT: adds r0, #3 1158; CHECK-BE-NEXT: vldrw.u32 q0, [r1] 1159; CHECK-BE-NEXT: vstrw.32 q0, [r0] 1160; CHECK-BE-NEXT: bx lr 1161entry: 1162 %z = getelementptr inbounds i8, i8* %y, i32 3 1163 %0 = bitcast i8* %x to <4 x i32>* 1164 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1165 %2 = bitcast i8* %z to <4 x i32>* 1166 store <4 x i32> %1, <4 x i32>* %2, align 4 1167 ret i8* %z 1168} 1169 1170define i8* @strw32_m4(i8* %y, i8* %x) { 1171; CHECK-LE-LABEL: strw32_m4: 1172; CHECK-LE: @ %bb.0: @ %entry 1173; CHECK-LE-NEXT: vldrw.u32 q0, [r1] 1174; CHECK-LE-NEXT: vstrb.8 q0, [r0, #-4]! 1175; CHECK-LE-NEXT: bx lr 1176; 1177; CHECK-BE-LABEL: strw32_m4: 1178; CHECK-BE: @ %bb.0: @ %entry 1179; CHECK-BE-NEXT: vldrw.u32 q0, [r1] 1180; CHECK-BE-NEXT: vstrw.32 q0, [r0, #-4]! 1181; CHECK-BE-NEXT: bx lr 1182entry: 1183 %z = getelementptr inbounds i8, i8* %y, i32 -4 1184 %0 = bitcast i8* %x to <4 x i32>* 1185 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1186 %2 = bitcast i8* %z to <4 x i32>* 1187 store <4 x i32> %1, <4 x i32>* %2, align 4 1188 ret i8* %z 1189} 1190 1191define i8* @strw32_508(i8* %y, i8* %x) { 1192; CHECK-LABEL: strw32_508: 1193; CHECK: @ %bb.0: @ %entry 1194; CHECK-NEXT: vldrw.u32 q0, [r1] 1195; CHECK-NEXT: vstrw.32 q0, [r0, #508]! 1196; CHECK-NEXT: bx lr 1197entry: 1198 %z = getelementptr inbounds i8, i8* %y, i32 508 1199 %0 = bitcast i8* %x to <4 x i32>* 1200 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1201 %2 = bitcast i8* %z to <4 x i32>* 1202 store <4 x i32> %1, <4 x i32>* %2, align 4 1203 ret i8* %z 1204} 1205 1206define i8* @strw32_512(i8* %y, i8* %x) { 1207; CHECK-LABEL: strw32_512: 1208; CHECK: @ %bb.0: @ %entry 1209; CHECK-NEXT: add.w r0, r0, #512 1210; CHECK-NEXT: vldrw.u32 q0, [r1] 1211; CHECK-NEXT: vstrw.32 q0, [r0] 1212; CHECK-NEXT: bx lr 1213entry: 1214 %z = getelementptr inbounds i8, i8* %y, i32 512 1215 %0 = bitcast i8* %x to <4 x i32>* 1216 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1217 %2 = bitcast i8* %z to <4 x i32>* 1218 store <4 x i32> %1, <4 x i32>* %2, align 4 1219 ret i8* %z 1220} 1221 1222define i8* @strw32_m508(i8* %y, i8* %x) { 1223; CHECK-LABEL: strw32_m508: 1224; CHECK: @ %bb.0: @ %entry 1225; CHECK-NEXT: vldrw.u32 q0, [r1] 1226; CHECK-NEXT: vstrw.32 q0, [r0, #-508]! 1227; CHECK-NEXT: bx lr 1228entry: 1229 %z = getelementptr inbounds i8, i8* %y, i32 -508 1230 %0 = bitcast i8* %x to <4 x i32>* 1231 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1232 %2 = bitcast i8* %z to <4 x i32>* 1233 store <4 x i32> %1, <4 x i32>* %2, align 4 1234 ret i8* %z 1235} 1236 1237define i8* @strw32_m512(i8* %y, i8* %x) { 1238; CHECK-LABEL: strw32_m512: 1239; CHECK: @ %bb.0: @ %entry 1240; CHECK-NEXT: sub.w r0, r0, #512 1241; CHECK-NEXT: vldrw.u32 q0, [r1] 1242; CHECK-NEXT: vstrw.32 q0, [r0] 1243; CHECK-NEXT: bx lr 1244entry: 1245 %z = getelementptr inbounds i8, i8* %y, i32 -512 1246 %0 = bitcast i8* %x to <4 x i32>* 1247 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1248 %2 = bitcast i8* %z to <4 x i32>* 1249 store <4 x i32> %1, <4 x i32>* %2, align 4 1250 ret i8* %z 1251} 1252 1253 1254define i8* @strh32_4(i8* %y, i8* %x) { 1255; CHECK-LABEL: strh32_4: 1256; CHECK: @ %bb.0: @ %entry 1257; CHECK-NEXT: vldrh.u32 q0, [r1] 1258; CHECK-NEXT: vstrh.32 q0, [r0, #4]! 1259; CHECK-NEXT: bx lr 1260entry: 1261 %z = getelementptr inbounds i8, i8* %y, i32 4 1262 %0 = bitcast i8* %x to <4 x i16>* 1263 %1 = load <4 x i16>, <4 x i16>* %0, align 2 1264 %2 = bitcast i8* %z to <4 x i16>* 1265 store <4 x i16> %1, <4 x i16>* %2, align 2 1266 ret i8* %z 1267} 1268 1269define i8* @strh32_3(i8* %y, i8* %x) { 1270; CHECK-LABEL: strh32_3: 1271; CHECK: @ %bb.0: @ %entry 1272; CHECK-NEXT: adds r0, #3 1273; CHECK-NEXT: vldrh.u32 q0, [r1] 1274; CHECK-NEXT: vstrh.32 q0, [r0] 1275; CHECK-NEXT: bx lr 1276entry: 1277 %z = getelementptr inbounds i8, i8* %y, i32 3 1278 %0 = bitcast i8* %x to <4 x i16>* 1279 %1 = load <4 x i16>, <4 x i16>* %0, align 2 1280 %2 = bitcast i8* %z to <4 x i16>* 1281 store <4 x i16> %1, <4 x i16>* %2, align 2 1282 ret i8* %z 1283} 1284 1285define i8* @strh32_2(i8* %y, i8* %x) { 1286; CHECK-LABEL: strh32_2: 1287; CHECK: @ %bb.0: @ %entry 1288; CHECK-NEXT: vldrh.u32 q0, [r1] 1289; CHECK-NEXT: vstrh.32 q0, [r0, #2]! 1290; CHECK-NEXT: bx lr 1291entry: 1292 %z = getelementptr inbounds i8, i8* %y, i32 2 1293 %0 = bitcast i8* %x to <4 x i16>* 1294 %1 = load <4 x i16>, <4 x i16>* %0, align 2 1295 %2 = bitcast i8* %z to <4 x i16>* 1296 store <4 x i16> %1, <4 x i16>* %2, align 2 1297 ret i8* %z 1298} 1299 1300define i8* @strh32_254(i8* %y, i8* %x) { 1301; CHECK-LABEL: strh32_254: 1302; CHECK: @ %bb.0: @ %entry 1303; CHECK-NEXT: vldrh.u32 q0, [r1] 1304; CHECK-NEXT: vstrh.32 q0, [r0, #254]! 1305; CHECK-NEXT: bx lr 1306entry: 1307 %z = getelementptr inbounds i8, i8* %y, i32 254 1308 %0 = bitcast i8* %x to <4 x i16>* 1309 %1 = load <4 x i16>, <4 x i16>* %0, align 2 1310 %2 = bitcast i8* %z to <4 x i16>* 1311 store <4 x i16> %1, <4 x i16>* %2, align 2 1312 ret i8* %z 1313} 1314 1315define i8* @strh32_256(i8* %y, i8* %x) { 1316; CHECK-LABEL: strh32_256: 1317; CHECK: @ %bb.0: @ %entry 1318; CHECK-NEXT: add.w r0, r0, #256 1319; CHECK-NEXT: vldrh.u32 q0, [r1] 1320; CHECK-NEXT: vstrh.32 q0, [r0] 1321; CHECK-NEXT: bx lr 1322entry: 1323 %z = getelementptr inbounds i8, i8* %y, i32 256 1324 %0 = bitcast i8* %x to <4 x i16>* 1325 %1 = load <4 x i16>, <4 x i16>* %0, align 2 1326 %2 = bitcast i8* %z to <4 x i16>* 1327 store <4 x i16> %1, <4 x i16>* %2, align 2 1328 ret i8* %z 1329} 1330 1331define i8* @strh32_m254(i8* %y, i8* %x) { 1332; CHECK-LABEL: strh32_m254: 1333; CHECK: @ %bb.0: @ %entry 1334; CHECK-NEXT: vldrh.u32 q0, [r1] 1335; CHECK-NEXT: vstrh.32 q0, [r0, #-254]! 1336; CHECK-NEXT: bx lr 1337entry: 1338 %z = getelementptr inbounds i8, i8* %y, i32 -254 1339 %0 = bitcast i8* %x to <4 x i16>* 1340 %1 = load <4 x i16>, <4 x i16>* %0, align 2 1341 %2 = bitcast i8* %z to <4 x i16>* 1342 store <4 x i16> %1, <4 x i16>* %2, align 2 1343 ret i8* %z 1344} 1345 1346define i8* @strh32_m256(i8* %y, i8* %x) { 1347; CHECK-LABEL: strh32_m256: 1348; CHECK: @ %bb.0: @ %entry 1349; CHECK-NEXT: sub.w r0, r0, #256 1350; CHECK-NEXT: vldrh.u32 q0, [r1] 1351; CHECK-NEXT: vstrh.32 q0, [r0] 1352; CHECK-NEXT: bx lr 1353entry: 1354 %z = getelementptr inbounds i8, i8* %y, i32 -256 1355 %0 = bitcast i8* %x to <4 x i16>* 1356 %1 = load <4 x i16>, <4 x i16>* %0, align 2 1357 %2 = bitcast i8* %z to <4 x i16>* 1358 store <4 x i16> %1, <4 x i16>* %2, align 2 1359 ret i8* %z 1360} 1361 1362 1363define i8* @strh16_4(i8* %y, i8* %x) { 1364; CHECK-LE-LABEL: strh16_4: 1365; CHECK-LE: @ %bb.0: @ %entry 1366; CHECK-LE-NEXT: vldrh.u16 q0, [r1] 1367; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]! 1368; CHECK-LE-NEXT: bx lr 1369; 1370; CHECK-BE-LABEL: strh16_4: 1371; CHECK-BE: @ %bb.0: @ %entry 1372; CHECK-BE-NEXT: vldrh.u16 q0, [r1] 1373; CHECK-BE-NEXT: vstrh.16 q0, [r0, #4]! 1374; CHECK-BE-NEXT: bx lr 1375entry: 1376 %z = getelementptr inbounds i8, i8* %y, i32 4 1377 %0 = bitcast i8* %x to <8 x i16>* 1378 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1379 %2 = bitcast i8* %z to <8 x i16>* 1380 store <8 x i16> %1, <8 x i16>* %2, align 2 1381 ret i8* %z 1382} 1383 1384define i8* @strh16_3(i8* %y, i8* %x) { 1385; CHECK-LE-LABEL: strh16_3: 1386; CHECK-LE: @ %bb.0: @ %entry 1387; CHECK-LE-NEXT: vldrh.u16 q0, [r1] 1388; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! 1389; CHECK-LE-NEXT: bx lr 1390; 1391; CHECK-BE-LABEL: strh16_3: 1392; CHECK-BE: @ %bb.0: @ %entry 1393; CHECK-BE-NEXT: adds r0, #3 1394; CHECK-BE-NEXT: vldrh.u16 q0, [r1] 1395; CHECK-BE-NEXT: vstrh.16 q0, [r0] 1396; CHECK-BE-NEXT: bx lr 1397entry: 1398 %z = getelementptr inbounds i8, i8* %y, i32 3 1399 %0 = bitcast i8* %x to <8 x i16>* 1400 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1401 %2 = bitcast i8* %z to <8 x i16>* 1402 store <8 x i16> %1, <8 x i16>* %2, align 2 1403 ret i8* %z 1404} 1405 1406define i8* @strh16_2(i8* %y, i8* %x) { 1407; CHECK-LE-LABEL: strh16_2: 1408; CHECK-LE: @ %bb.0: @ %entry 1409; CHECK-LE-NEXT: vldrh.u16 q0, [r1] 1410; CHECK-LE-NEXT: vstrb.8 q0, [r0, #2]! 1411; CHECK-LE-NEXT: bx lr 1412; 1413; CHECK-BE-LABEL: strh16_2: 1414; CHECK-BE: @ %bb.0: @ %entry 1415; CHECK-BE-NEXT: vldrh.u16 q0, [r1] 1416; CHECK-BE-NEXT: vstrh.16 q0, [r0, #2]! 1417; CHECK-BE-NEXT: bx lr 1418entry: 1419 %z = getelementptr inbounds i8, i8* %y, i32 2 1420 %0 = bitcast i8* %x to <8 x i16>* 1421 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1422 %2 = bitcast i8* %z to <8 x i16>* 1423 store <8 x i16> %1, <8 x i16>* %2, align 2 1424 ret i8* %z 1425} 1426 1427define i8* @strh16_254(i8* %y, i8* %x) { 1428; CHECK-LABEL: strh16_254: 1429; CHECK: @ %bb.0: @ %entry 1430; CHECK-NEXT: vldrh.u16 q0, [r1] 1431; CHECK-NEXT: vstrh.16 q0, [r0, #254]! 1432; CHECK-NEXT: bx lr 1433entry: 1434 %z = getelementptr inbounds i8, i8* %y, i32 254 1435 %0 = bitcast i8* %x to <8 x i16>* 1436 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1437 %2 = bitcast i8* %z to <8 x i16>* 1438 store <8 x i16> %1, <8 x i16>* %2, align 2 1439 ret i8* %z 1440} 1441 1442define i8* @strh16_256(i8* %y, i8* %x) { 1443; CHECK-LABEL: strh16_256: 1444; CHECK: @ %bb.0: @ %entry 1445; CHECK-NEXT: add.w r0, r0, #256 1446; CHECK-NEXT: vldrh.u16 q0, [r1] 1447; CHECK-NEXT: vstrh.16 q0, [r0] 1448; CHECK-NEXT: bx lr 1449entry: 1450 %z = getelementptr inbounds i8, i8* %y, i32 256 1451 %0 = bitcast i8* %x to <8 x i16>* 1452 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1453 %2 = bitcast i8* %z to <8 x i16>* 1454 store <8 x i16> %1, <8 x i16>* %2, align 2 1455 ret i8* %z 1456} 1457 1458define i8* @strh16_m254(i8* %y, i8* %x) { 1459; CHECK-LABEL: strh16_m254: 1460; CHECK: @ %bb.0: @ %entry 1461; CHECK-NEXT: vldrh.u16 q0, [r1] 1462; CHECK-NEXT: vstrh.16 q0, [r0, #-254]! 1463; CHECK-NEXT: bx lr 1464entry: 1465 %z = getelementptr inbounds i8, i8* %y, i32 -254 1466 %0 = bitcast i8* %x to <8 x i16>* 1467 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1468 %2 = bitcast i8* %z to <8 x i16>* 1469 store <8 x i16> %1, <8 x i16>* %2, align 2 1470 ret i8* %z 1471} 1472 1473define i8* @strh16_m256(i8* %y, i8* %x) { 1474; CHECK-LABEL: strh16_m256: 1475; CHECK: @ %bb.0: @ %entry 1476; CHECK-NEXT: sub.w r0, r0, #256 1477; CHECK-NEXT: vldrh.u16 q0, [r1] 1478; CHECK-NEXT: vstrh.16 q0, [r0] 1479; CHECK-NEXT: bx lr 1480entry: 1481 %z = getelementptr inbounds i8, i8* %y, i32 -256 1482 %0 = bitcast i8* %x to <8 x i16>* 1483 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1484 %2 = bitcast i8* %z to <8 x i16>* 1485 store <8 x i16> %1, <8 x i16>* %2, align 2 1486 ret i8* %z 1487} 1488 1489 1490define i8* @strb32_4(i8* %y, i8* %x) { 1491; CHECK-LABEL: strb32_4: 1492; CHECK: @ %bb.0: @ %entry 1493; CHECK-NEXT: vldrb.u32 q0, [r1] 1494; CHECK-NEXT: vstrb.32 q0, [r0, #4]! 1495; CHECK-NEXT: bx lr 1496entry: 1497 %z = getelementptr inbounds i8, i8* %y, i32 4 1498 %0 = bitcast i8* %x to <4 x i8>* 1499 %1 = load <4 x i8>, <4 x i8>* %0, align 1 1500 %2 = bitcast i8* %z to <4 x i8>* 1501 store <4 x i8> %1, <4 x i8>* %2, align 1 1502 ret i8* %z 1503} 1504 1505define i8* @strb32_3(i8* %y, i8* %x) { 1506; CHECK-LABEL: strb32_3: 1507; CHECK: @ %bb.0: @ %entry 1508; CHECK-NEXT: vldrb.u32 q0, [r1] 1509; CHECK-NEXT: vstrb.32 q0, [r0, #3]! 1510; CHECK-NEXT: bx lr 1511entry: 1512 %z = getelementptr inbounds i8, i8* %y, i32 3 1513 %0 = bitcast i8* %x to <4 x i8>* 1514 %1 = load <4 x i8>, <4 x i8>* %0, align 1 1515 %2 = bitcast i8* %z to <4 x i8>* 1516 store <4 x i8> %1, <4 x i8>* %2, align 1 1517 ret i8* %z 1518} 1519 1520define i8* @strb32_127(i8* %y, i8* %x) { 1521; CHECK-LABEL: strb32_127: 1522; CHECK: @ %bb.0: @ %entry 1523; CHECK-NEXT: vldrb.u32 q0, [r1] 1524; CHECK-NEXT: vstrb.32 q0, [r0, #127]! 1525; CHECK-NEXT: bx lr 1526entry: 1527 %z = getelementptr inbounds i8, i8* %y, i32 127 1528 %0 = bitcast i8* %x to <4 x i8>* 1529 %1 = load <4 x i8>, <4 x i8>* %0, align 1 1530 %2 = bitcast i8* %z to <4 x i8>* 1531 store <4 x i8> %1, <4 x i8>* %2, align 1 1532 ret i8* %z 1533} 1534 1535define i8* @strb32_128(i8* %y, i8* %x) { 1536; CHECK-LABEL: strb32_128: 1537; CHECK: @ %bb.0: @ %entry 1538; CHECK-NEXT: adds r0, #128 1539; CHECK-NEXT: vldrb.u32 q0, [r1] 1540; CHECK-NEXT: vstrb.32 q0, [r0] 1541; CHECK-NEXT: bx lr 1542entry: 1543 %z = getelementptr inbounds i8, i8* %y, i32 128 1544 %0 = bitcast i8* %x to <4 x i8>* 1545 %1 = load <4 x i8>, <4 x i8>* %0, align 1 1546 %2 = bitcast i8* %z to <4 x i8>* 1547 store <4 x i8> %1, <4 x i8>* %2, align 1 1548 ret i8* %z 1549} 1550 1551define i8* @strb32_m127(i8* %y, i8* %x) { 1552; CHECK-LABEL: strb32_m127: 1553; CHECK: @ %bb.0: @ %entry 1554; CHECK-NEXT: vldrb.u32 q0, [r1] 1555; CHECK-NEXT: vstrb.32 q0, [r0, #-127]! 1556; CHECK-NEXT: bx lr 1557entry: 1558 %z = getelementptr inbounds i8, i8* %y, i32 -127 1559 %0 = bitcast i8* %x to <4 x i8>* 1560 %1 = load <4 x i8>, <4 x i8>* %0, align 1 1561 %2 = bitcast i8* %z to <4 x i8>* 1562 store <4 x i8> %1, <4 x i8>* %2, align 1 1563 ret i8* %z 1564} 1565 1566define i8* @strb32_m128(i8* %y, i8* %x) { 1567; CHECK-LABEL: strb32_m128: 1568; CHECK: @ %bb.0: @ %entry 1569; CHECK-NEXT: subs r0, #128 1570; CHECK-NEXT: vldrb.u32 q0, [r1] 1571; CHECK-NEXT: vstrb.32 q0, [r0] 1572; CHECK-NEXT: bx lr 1573entry: 1574 %z = getelementptr inbounds i8, i8* %y, i32 -128 1575 %0 = bitcast i8* %x to <4 x i8>* 1576 %1 = load <4 x i8>, <4 x i8>* %0, align 1 1577 %2 = bitcast i8* %z to <4 x i8>* 1578 store <4 x i8> %1, <4 x i8>* %2, align 1 1579 ret i8* %z 1580} 1581 1582 1583define i8* @strb16_4(i8* %y, i8* %x) { 1584; CHECK-LABEL: strb16_4: 1585; CHECK: @ %bb.0: @ %entry 1586; CHECK-NEXT: vldrb.u16 q0, [r1] 1587; CHECK-NEXT: vstrb.16 q0, [r0, #4]! 1588; CHECK-NEXT: bx lr 1589entry: 1590 %z = getelementptr inbounds i8, i8* %y, i32 4 1591 %0 = bitcast i8* %x to <8 x i8>* 1592 %1 = load <8 x i8>, <8 x i8>* %0, align 1 1593 %2 = bitcast i8* %z to <8 x i8>* 1594 store <8 x i8> %1, <8 x i8>* %2, align 1 1595 ret i8* %z 1596} 1597 1598define i8* @strb16_3(i8* %y, i8* %x) { 1599; CHECK-LABEL: strb16_3: 1600; CHECK: @ %bb.0: @ %entry 1601; CHECK-NEXT: vldrb.u16 q0, [r1] 1602; CHECK-NEXT: vstrb.16 q0, [r0, #3]! 1603; CHECK-NEXT: bx lr 1604entry: 1605 %z = getelementptr inbounds i8, i8* %y, i32 3 1606 %0 = bitcast i8* %x to <8 x i8>* 1607 %1 = load <8 x i8>, <8 x i8>* %0, align 1 1608 %2 = bitcast i8* %z to <8 x i8>* 1609 store <8 x i8> %1, <8 x i8>* %2, align 1 1610 ret i8* %z 1611} 1612 1613define i8* @strb16_127(i8* %y, i8* %x) { 1614; CHECK-LABEL: strb16_127: 1615; CHECK: @ %bb.0: @ %entry 1616; CHECK-NEXT: vldrb.u16 q0, [r1] 1617; CHECK-NEXT: vstrb.16 q0, [r0, #127]! 1618; CHECK-NEXT: bx lr 1619entry: 1620 %z = getelementptr inbounds i8, i8* %y, i32 127 1621 %0 = bitcast i8* %x to <8 x i8>* 1622 %1 = load <8 x i8>, <8 x i8>* %0, align 1 1623 %2 = bitcast i8* %z to <8 x i8>* 1624 store <8 x i8> %1, <8 x i8>* %2, align 1 1625 ret i8* %z 1626} 1627 1628define i8* @strb16_128(i8* %y, i8* %x) { 1629; CHECK-LABEL: strb16_128: 1630; CHECK: @ %bb.0: @ %entry 1631; CHECK-NEXT: adds r0, #128 1632; CHECK-NEXT: vldrb.u16 q0, [r1] 1633; CHECK-NEXT: vstrb.16 q0, [r0] 1634; CHECK-NEXT: bx lr 1635entry: 1636 %z = getelementptr inbounds i8, i8* %y, i32 128 1637 %0 = bitcast i8* %x to <8 x i8>* 1638 %1 = load <8 x i8>, <8 x i8>* %0, align 1 1639 %2 = bitcast i8* %z to <8 x i8>* 1640 store <8 x i8> %1, <8 x i8>* %2, align 1 1641 ret i8* %z 1642} 1643 1644define i8* @strb16_m127(i8* %y, i8* %x) { 1645; CHECK-LABEL: strb16_m127: 1646; CHECK: @ %bb.0: @ %entry 1647; CHECK-NEXT: vldrb.u16 q0, [r1] 1648; CHECK-NEXT: vstrb.16 q0, [r0, #-127]! 1649; CHECK-NEXT: bx lr 1650entry: 1651 %z = getelementptr inbounds i8, i8* %y, i32 -127 1652 %0 = bitcast i8* %x to <8 x i8>* 1653 %1 = load <8 x i8>, <8 x i8>* %0, align 1 1654 %2 = bitcast i8* %z to <8 x i8>* 1655 store <8 x i8> %1, <8 x i8>* %2, align 1 1656 ret i8* %z 1657} 1658 1659define i8* @strb16_m128(i8* %y, i8* %x) { 1660; CHECK-LABEL: strb16_m128: 1661; CHECK: @ %bb.0: @ %entry 1662; CHECK-NEXT: subs r0, #128 1663; CHECK-NEXT: vldrb.u16 q0, [r1] 1664; CHECK-NEXT: vstrb.16 q0, [r0] 1665; CHECK-NEXT: bx lr 1666entry: 1667 %z = getelementptr inbounds i8, i8* %y, i32 -128 1668 %0 = bitcast i8* %x to <8 x i8>* 1669 %1 = load <8 x i8>, <8 x i8>* %0, align 1 1670 %2 = bitcast i8* %z to <8 x i8>* 1671 store <8 x i8> %1, <8 x i8>* %2, align 1 1672 ret i8* %z 1673} 1674 1675 1676define i8* @strb8_4(i8* %y, i8* %x) { 1677; CHECK-LABEL: strb8_4: 1678; CHECK: @ %bb.0: @ %entry 1679; CHECK-NEXT: vldrb.u8 q0, [r1] 1680; CHECK-NEXT: vstrb.8 q0, [r0, #4]! 1681; CHECK-NEXT: bx lr 1682entry: 1683 %z = getelementptr inbounds i8, i8* %y, i32 4 1684 %0 = bitcast i8* %x to <16 x i8>* 1685 %1 = load <16 x i8>, <16 x i8>* %0, align 1 1686 %2 = bitcast i8* %z to <16 x i8>* 1687 store <16 x i8> %1, <16 x i8>* %2, align 1 1688 ret i8* %z 1689} 1690 1691define i8* @strb8_3(i8* %y, i8* %x) { 1692; CHECK-LABEL: strb8_3: 1693; CHECK: @ %bb.0: @ %entry 1694; CHECK-NEXT: vldrb.u8 q0, [r1] 1695; CHECK-NEXT: vstrb.8 q0, [r0, #3]! 1696; CHECK-NEXT: bx lr 1697entry: 1698 %z = getelementptr inbounds i8, i8* %y, i32 3 1699 %0 = bitcast i8* %x to <16 x i8>* 1700 %1 = load <16 x i8>, <16 x i8>* %0, align 1 1701 %2 = bitcast i8* %z to <16 x i8>* 1702 store <16 x i8> %1, <16 x i8>* %2, align 1 1703 ret i8* %z 1704} 1705 1706define i8* @strb8_127(i8* %y, i8* %x) { 1707; CHECK-LABEL: strb8_127: 1708; CHECK: @ %bb.0: @ %entry 1709; CHECK-NEXT: vldrb.u8 q0, [r1] 1710; CHECK-NEXT: vstrb.8 q0, [r0, #127]! 1711; CHECK-NEXT: bx lr 1712entry: 1713 %z = getelementptr inbounds i8, i8* %y, i32 127 1714 %0 = bitcast i8* %x to <16 x i8>* 1715 %1 = load <16 x i8>, <16 x i8>* %0, align 1 1716 %2 = bitcast i8* %z to <16 x i8>* 1717 store <16 x i8> %1, <16 x i8>* %2, align 1 1718 ret i8* %z 1719} 1720 1721define i8* @strb8_128(i8* %y, i8* %x) { 1722; CHECK-LABEL: strb8_128: 1723; CHECK: @ %bb.0: @ %entry 1724; CHECK-NEXT: adds r0, #128 1725; CHECK-NEXT: vldrb.u8 q0, [r1] 1726; CHECK-NEXT: vstrb.8 q0, [r0] 1727; CHECK-NEXT: bx lr 1728entry: 1729 %z = getelementptr inbounds i8, i8* %y, i32 128 1730 %0 = bitcast i8* %x to <16 x i8>* 1731 %1 = load <16 x i8>, <16 x i8>* %0, align 1 1732 %2 = bitcast i8* %z to <16 x i8>* 1733 store <16 x i8> %1, <16 x i8>* %2, align 1 1734 ret i8* %z 1735} 1736 1737define i8* @strb8_m127(i8* %y, i8* %x) { 1738; CHECK-LABEL: strb8_m127: 1739; CHECK: @ %bb.0: @ %entry 1740; CHECK-NEXT: vldrb.u8 q0, [r1] 1741; CHECK-NEXT: vstrb.8 q0, [r0, #-127]! 1742; CHECK-NEXT: bx lr 1743entry: 1744 %z = getelementptr inbounds i8, i8* %y, i32 -127 1745 %0 = bitcast i8* %x to <16 x i8>* 1746 %1 = load <16 x i8>, <16 x i8>* %0, align 1 1747 %2 = bitcast i8* %z to <16 x i8>* 1748 store <16 x i8> %1, <16 x i8>* %2, align 1 1749 ret i8* %z 1750} 1751 1752define i8* @strb8_m128(i8* %y, i8* %x) { 1753; CHECK-LABEL: strb8_m128: 1754; CHECK: @ %bb.0: @ %entry 1755; CHECK-NEXT: subs r0, #128 1756; CHECK-NEXT: vldrb.u8 q0, [r1] 1757; CHECK-NEXT: vstrb.8 q0, [r0] 1758; CHECK-NEXT: bx lr 1759entry: 1760 %z = getelementptr inbounds i8, i8* %y, i32 -128 1761 %0 = bitcast i8* %x to <16 x i8>* 1762 %1 = load <16 x i8>, <16 x i8>* %0, align 1 1763 %2 = bitcast i8* %z to <16 x i8>* 1764 store <16 x i8> %1, <16 x i8>* %2, align 1 1765 ret i8* %z 1766} 1767 1768 1769define i8* @strf32_4(i8* %y, i8* %x) { 1770; CHECK-LE-LABEL: strf32_4: 1771; CHECK-LE: @ %bb.0: @ %entry 1772; CHECK-LE-NEXT: vldrw.u32 q0, [r1] 1773; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]! 1774; CHECK-LE-NEXT: bx lr 1775; 1776; CHECK-BE-LABEL: strf32_4: 1777; CHECK-BE: @ %bb.0: @ %entry 1778; CHECK-BE-NEXT: vldrw.u32 q0, [r1] 1779; CHECK-BE-NEXT: vstrw.32 q0, [r0, #4]! 1780; CHECK-BE-NEXT: bx lr 1781entry: 1782 %z = getelementptr inbounds i8, i8* %y, i32 4 1783 %0 = bitcast i8* %x to <4 x float>* 1784 %1 = load <4 x float>, <4 x float>* %0, align 4 1785 %2 = bitcast i8* %z to <4 x float>* 1786 store <4 x float> %1, <4 x float>* %2, align 4 1787 ret i8* %z 1788} 1789 1790define i8* @strf16_4(i8* %y, i8* %x) { 1791; CHECK-LE-LABEL: strf16_4: 1792; CHECK-LE: @ %bb.0: @ %entry 1793; CHECK-LE-NEXT: vldrh.u16 q0, [r1] 1794; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]! 1795; CHECK-LE-NEXT: bx lr 1796; 1797; CHECK-BE-LABEL: strf16_4: 1798; CHECK-BE: @ %bb.0: @ %entry 1799; CHECK-BE-NEXT: vldrh.u16 q0, [r1] 1800; CHECK-BE-NEXT: vstrh.16 q0, [r0, #4]! 1801; CHECK-BE-NEXT: bx lr 1802entry: 1803 %z = getelementptr inbounds i8, i8* %y, i32 4 1804 %0 = bitcast i8* %x to <8 x half>* 1805 %1 = load <8 x half>, <8 x half>* %0, align 2 1806 %2 = bitcast i8* %z to <8 x half>* 1807 store <8 x half> %1, <8 x half>* %2, align 2 1808 ret i8* %z 1809} 1810 1811define i8* @strwi32_align1(i8* %y, i8* %x) { 1812; CHECK-LE-LABEL: strwi32_align1: 1813; CHECK-LE: @ %bb.0: @ %entry 1814; CHECK-LE-NEXT: vldrw.u32 q0, [r1] 1815; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! 1816; CHECK-LE-NEXT: bx lr 1817; 1818; CHECK-BE-LABEL: strwi32_align1: 1819; CHECK-BE: @ %bb.0: @ %entry 1820; CHECK-BE-NEXT: vldrw.u32 q0, [r1] 1821; CHECK-BE-NEXT: vrev32.8 q0, q0 1822; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] 1823; CHECK-BE-NEXT: adds r0, #3 1824; CHECK-BE-NEXT: bx lr 1825entry: 1826 %z = getelementptr inbounds i8, i8* %y, i32 3 1827 %0 = bitcast i8* %x to <4 x i32>* 1828 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1829 %2 = bitcast i8* %z to <4 x i32>* 1830 store <4 x i32> %1, <4 x i32>* %2, align 1 1831 ret i8* %z 1832} 1833 1834define i8* @strhi16_align1(i8* %y, i8* %x) { 1835; CHECK-LE-LABEL: strhi16_align1: 1836; CHECK-LE: @ %bb.0: @ %entry 1837; CHECK-LE-NEXT: vldrh.u16 q0, [r1] 1838; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! 1839; CHECK-LE-NEXT: bx lr 1840; 1841; CHECK-BE-LABEL: strhi16_align1: 1842; CHECK-BE: @ %bb.0: @ %entry 1843; CHECK-BE-NEXT: vldrh.u16 q0, [r1] 1844; CHECK-BE-NEXT: vrev16.8 q0, q0 1845; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] 1846; CHECK-BE-NEXT: adds r0, #3 1847; CHECK-BE-NEXT: bx lr 1848entry: 1849 %z = getelementptr inbounds i8, i8* %y, i32 3 1850 %0 = bitcast i8* %x to <8 x i16>* 1851 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1852 %2 = bitcast i8* %z to <8 x i16>* 1853 store <8 x i16> %1, <8 x i16>* %2, align 1 1854 ret i8* %z 1855} 1856 1857define i8* @strhi32_align1(i8* %y, i8* %x) { 1858; CHECK-LABEL: strhi32_align1: 1859; CHECK: @ %bb.0: @ %entry 1860; CHECK-NEXT: .pad #8 1861; CHECK-NEXT: sub sp, #8 1862; CHECK-NEXT: vldrw.u32 q0, [r1] 1863; CHECK-NEXT: mov r1, sp 1864; CHECK-NEXT: vstrh.32 q0, [r1] 1865; CHECK-NEXT: ldrd r1, r2, [sp] 1866; CHECK-NEXT: str r1, [r0, #3]! 1867; CHECK-NEXT: str r2, [r0, #4] 1868; CHECK-NEXT: add sp, #8 1869; CHECK-NEXT: bx lr 1870entry: 1871 %z = getelementptr inbounds i8, i8* %y, i32 3 1872 %0 = bitcast i8* %x to <4 x i32>* 1873 %1 = load <4 x i32>, <4 x i32>* %0, align 4 1874 %2 = bitcast i8* %z to <4 x i16>* 1875 %3 = trunc <4 x i32> %1 to <4 x i16> 1876 store <4 x i16> %3, <4 x i16>* %2, align 1 1877 ret i8* %z 1878} 1879 1880define i8* @strf32_align1(i8* %y, i8* %x) { 1881; CHECK-LE-LABEL: strf32_align1: 1882; CHECK-LE: @ %bb.0: @ %entry 1883; CHECK-LE-NEXT: vldrw.u32 q0, [r1] 1884; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! 1885; CHECK-LE-NEXT: bx lr 1886; 1887; CHECK-BE-LABEL: strf32_align1: 1888; CHECK-BE: @ %bb.0: @ %entry 1889; CHECK-BE-NEXT: vldrw.u32 q0, [r1] 1890; CHECK-BE-NEXT: vrev32.8 q0, q0 1891; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] 1892; CHECK-BE-NEXT: adds r0, #3 1893; CHECK-BE-NEXT: bx lr 1894entry: 1895 %z = getelementptr inbounds i8, i8* %y, i32 3 1896 %0 = bitcast i8* %x to <4 x float>* 1897 %1 = load <4 x float>, <4 x float>* %0, align 4 1898 %2 = bitcast i8* %z to <4 x float>* 1899 store <4 x float> %1, <4 x float>* %2, align 1 1900 ret i8* %z 1901} 1902 1903define i8* @strf16_align1(i8* %y, i8* %x) { 1904; CHECK-LE-LABEL: strf16_align1: 1905; CHECK-LE: @ %bb.0: @ %entry 1906; CHECK-LE-NEXT: vldrh.u16 q0, [r1] 1907; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! 1908; CHECK-LE-NEXT: bx lr 1909; 1910; CHECK-BE-LABEL: strf16_align1: 1911; CHECK-BE: @ %bb.0: @ %entry 1912; CHECK-BE-NEXT: vldrh.u16 q0, [r1] 1913; CHECK-BE-NEXT: vrev16.8 q0, q0 1914; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] 1915; CHECK-BE-NEXT: adds r0, #3 1916; CHECK-BE-NEXT: bx lr 1917entry: 1918 %z = getelementptr inbounds i8, i8* %y, i32 3 1919 %0 = bitcast i8* %x to <8 x half>* 1920 %1 = load <8 x half>, <8 x half>* %0, align 2 1921 %2 = bitcast i8* %z to <8 x half>* 1922 store <8 x half> %1, <8 x half>* %2, align 1 1923 ret i8* %z 1924} 1925 1926define i8* @strf16_align8(i8* %y, i8* %x) { 1927; CHECK-LE-LABEL: strf16_align8: 1928; CHECK-LE: @ %bb.0: @ %entry 1929; CHECK-LE-NEXT: vldrh.u16 q0, [r1] 1930; CHECK-LE-NEXT: vstrb.8 q0, [r0, #16]! 1931; CHECK-LE-NEXT: bx lr 1932; 1933; CHECK-BE-LABEL: strf16_align8: 1934; CHECK-BE: @ %bb.0: @ %entry 1935; CHECK-BE-NEXT: vldrh.u16 q0, [r1] 1936; CHECK-BE-NEXT: vstrh.16 q0, [r0, #16]! 1937; CHECK-BE-NEXT: bx lr 1938entry: 1939 %z = getelementptr inbounds i8, i8* %y, i32 16 1940 %0 = bitcast i8* %x to <8 x i16>* 1941 %1 = load <8 x i16>, <8 x i16>* %0, align 2 1942 %2 = bitcast i8* %z to <8 x i16>* 1943 store <8 x i16> %1, <8 x i16>* %2, align 8 1944 ret i8* %z 1945} 1946