1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; bswap should be constant folded when it is passed a constant argument 3 4; RUN: llc < %s -mtriple=i686-- -mcpu=i686 | FileCheck %s 5; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefix=CHECK64 6 7declare i16 @llvm.bswap.i16(i16) 8declare i32 @llvm.bswap.i32(i32) 9declare i64 @llvm.bswap.i64(i64) 10 11define i16 @W(i16 %A) { 12; CHECK-LABEL: W: 13; CHECK: # %bb.0: 14; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax 15; CHECK-NEXT: rolw $8, %ax 16; CHECK-NEXT: retl 17; 18; CHECK64-LABEL: W: 19; CHECK64: # %bb.0: 20; CHECK64-NEXT: movl %edi, %eax 21; CHECK64-NEXT: rolw $8, %ax 22; CHECK64-NEXT: # kill: def $ax killed $ax killed $eax 23; CHECK64-NEXT: retq 24 %Z = call i16 @llvm.bswap.i16( i16 %A ) ; <i16> [#uses=1] 25 ret i16 %Z 26} 27 28define dso_local i32 @X(i32 %A) { 29; CHECK-LABEL: X: 30; CHECK: # %bb.0: 31; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 32; CHECK-NEXT: bswapl %eax 33; CHECK-NEXT: retl 34; 35; CHECK64-LABEL: X: 36; CHECK64: # %bb.0: 37; CHECK64-NEXT: movl %edi, %eax 38; CHECK64-NEXT: bswapl %eax 39; CHECK64-NEXT: retq 40 %Z = call i32 @llvm.bswap.i32( i32 %A ) ; <i32> [#uses=1] 41 ret i32 %Z 42} 43 44define i64 @Y(i64 %A) { 45; CHECK-LABEL: Y: 46; CHECK: # %bb.0: 47; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 48; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 49; CHECK-NEXT: bswapl %eax 50; CHECK-NEXT: bswapl %edx 51; CHECK-NEXT: retl 52; 53; CHECK64-LABEL: Y: 54; CHECK64: # %bb.0: 55; CHECK64-NEXT: movq %rdi, %rax 56; CHECK64-NEXT: bswapq %rax 57; CHECK64-NEXT: retq 58 %Z = call i64 @llvm.bswap.i64( i64 %A ) ; <i64> [#uses=1] 59 ret i64 %Z 60} 61 62; This isn't really a bswap test, but the potential probem is 63; easier to see with bswap vs. other ops. The transform in 64; question starts with a bitwise logic op and tries to hoist 65; those ahead of other ops. But that's not generally profitable 66; when the other ops have other uses (and it might not be safe 67; either due to unconstrained instruction count growth). 68 69define dso_local i32 @bswap_multiuse(i32 %x, i32 %y, i32* %p1, i32* %p2) nounwind { 70; CHECK-LABEL: bswap_multiuse: 71; CHECK: # %bb.0: 72; CHECK-NEXT: pushl %esi 73; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 74; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 75; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 76; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi 77; CHECK-NEXT: bswapl %esi 78; CHECK-NEXT: bswapl %eax 79; CHECK-NEXT: movl %esi, (%edx) 80; CHECK-NEXT: movl %eax, (%ecx) 81; CHECK-NEXT: orl %esi, %eax 82; CHECK-NEXT: popl %esi 83; CHECK-NEXT: retl 84; 85; CHECK64-LABEL: bswap_multiuse: 86; CHECK64: # %bb.0: 87; CHECK64-NEXT: movl %esi, %eax 88; CHECK64-NEXT: bswapl %edi 89; CHECK64-NEXT: bswapl %eax 90; CHECK64-NEXT: movl %edi, (%rdx) 91; CHECK64-NEXT: movl %eax, (%rcx) 92; CHECK64-NEXT: orl %edi, %eax 93; CHECK64-NEXT: retq 94 %xt = call i32 @llvm.bswap.i32(i32 %x) 95 %yt = call i32 @llvm.bswap.i32(i32 %y) 96 store i32 %xt, i32* %p1 97 store i32 %yt, i32* %p2 98 %r = or i32 %xt, %yt 99 ret i32 %r 100} 101 102; rdar://9164521 103define dso_local i32 @test1(i32 %a) nounwind readnone { 104; CHECK-LABEL: test1: 105; CHECK: # %bb.0: 106; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 107; CHECK-NEXT: bswapl %eax 108; CHECK-NEXT: shrl $16, %eax 109; CHECK-NEXT: retl 110; 111; CHECK64-LABEL: test1: 112; CHECK64: # %bb.0: 113; CHECK64-NEXT: movl %edi, %eax 114; CHECK64-NEXT: bswapl %eax 115; CHECK64-NEXT: shrl $16, %eax 116; CHECK64-NEXT: retq 117 %and = lshr i32 %a, 8 118 %shr3 = and i32 %and, 255 119 %and2 = shl i32 %a, 8 120 %shl = and i32 %and2, 65280 121 %or = or i32 %shr3, %shl 122 ret i32 %or 123} 124 125define dso_local i32 @test2(i32 %a) nounwind readnone { 126; CHECK-LABEL: test2: 127; CHECK: # %bb.0: 128; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 129; CHECK-NEXT: bswapl %eax 130; CHECK-NEXT: sarl $16, %eax 131; CHECK-NEXT: retl 132; 133; CHECK64-LABEL: test2: 134; CHECK64: # %bb.0: 135; CHECK64-NEXT: movl %edi, %eax 136; CHECK64-NEXT: bswapl %eax 137; CHECK64-NEXT: sarl $16, %eax 138; CHECK64-NEXT: retq 139 %and = lshr i32 %a, 8 140 %shr4 = and i32 %and, 255 141 %and2 = shl i32 %a, 8 142 %or = or i32 %shr4, %and2 143 %sext = shl i32 %or, 16 144 %conv3 = ashr exact i32 %sext, 16 145 ret i32 %conv3 146} 147 148@var8 = dso_local global i8 0 149@var16 = dso_local global i16 0 150 151; The "shl" below can move bits into the high parts of the value, so the 152; operation is not a "bswap, shr" pair. 153 154; rdar://problem/14814049 155define i64 @not_bswap() { 156; CHECK-LABEL: not_bswap: 157; CHECK: # %bb.0: 158; CHECK-NEXT: movzwl var16, %eax 159; CHECK-NEXT: movl %eax, %ecx 160; CHECK-NEXT: shrl $8, %ecx 161; CHECK-NEXT: shll $8, %eax 162; CHECK-NEXT: orl %ecx, %eax 163; CHECK-NEXT: xorl %edx, %edx 164; CHECK-NEXT: retl 165; 166; CHECK64-LABEL: not_bswap: 167; CHECK64: # %bb.0: 168; CHECK64-NEXT: movzwl var16(%rip), %eax 169; CHECK64-NEXT: movq %rax, %rcx 170; CHECK64-NEXT: shrq $8, %rcx 171; CHECK64-NEXT: shlq $8, %rax 172; CHECK64-NEXT: orq %rcx, %rax 173; CHECK64-NEXT: retq 174 %init = load i16, i16* @var16 175 %big = zext i16 %init to i64 176 177 %hishifted = lshr i64 %big, 8 178 %loshifted = shl i64 %big, 8 179 180 %notswapped = or i64 %hishifted, %loshifted 181 182 ret i64 %notswapped 183} 184 185; This time, the lshr (and subsequent or) is completely useless. While it's 186; technically correct to convert this into a "bswap, shr", it's suboptimal. A 187; simple shl works better. 188 189define i64 @not_useful_bswap() { 190; CHECK-LABEL: not_useful_bswap: 191; CHECK: # %bb.0: 192; CHECK-NEXT: movzbl var8, %eax 193; CHECK-NEXT: shll $8, %eax 194; CHECK-NEXT: xorl %edx, %edx 195; CHECK-NEXT: retl 196; 197; CHECK64-LABEL: not_useful_bswap: 198; CHECK64: # %bb.0: 199; CHECK64-NEXT: movzbl var8(%rip), %eax 200; CHECK64-NEXT: shlq $8, %rax 201; CHECK64-NEXT: retq 202 %init = load i8, i8* @var8 203 %big = zext i8 %init to i64 204 205 %hishifted = lshr i64 %big, 8 206 %loshifted = shl i64 %big, 8 207 208 %notswapped = or i64 %hishifted, %loshifted 209 210 ret i64 %notswapped 211} 212 213; Finally, it *is* OK to just mask off the shl if we know that the value is zero 214; beyond 16 bits anyway. This is a legitimate bswap. 215 216define i64 @finally_useful_bswap() { 217; CHECK-LABEL: finally_useful_bswap: 218; CHECK: # %bb.0: 219; CHECK-NEXT: movzwl var16, %eax 220; CHECK-NEXT: bswapl %eax 221; CHECK-NEXT: shrl $16, %eax 222; CHECK-NEXT: xorl %edx, %edx 223; CHECK-NEXT: retl 224; 225; CHECK64-LABEL: finally_useful_bswap: 226; CHECK64: # %bb.0: 227; CHECK64-NEXT: movzwl var16(%rip), %eax 228; CHECK64-NEXT: bswapq %rax 229; CHECK64-NEXT: shrq $48, %rax 230; CHECK64-NEXT: retq 231 %init = load i16, i16* @var16 232 %big = zext i16 %init to i64 233 234 %hishifted = lshr i64 %big, 8 235 %lomasked = and i64 %big, 255 236 %loshifted = shl i64 %lomasked, 8 237 238 %swapped = or i64 %hishifted, %loshifted 239 240 ret i64 %swapped 241} 242 243; Make sure we don't assert during type legalization promoting a large 244; bswap due to the need for a large shift that won't fit in the i8 returned 245; from getShiftAmountTy. 246define i528 @large_promotion(i528 %A) nounwind { 247; CHECK-LABEL: large_promotion: 248; CHECK: # %bb.0: 249; CHECK-NEXT: pushl %ebp 250; CHECK-NEXT: pushl %ebx 251; CHECK-NEXT: pushl %edi 252; CHECK-NEXT: pushl %esi 253; CHECK-NEXT: subl $44, %esp 254; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp 255; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx 256; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi 257; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi 258; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 259; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 260; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 261; CHECK-NEXT: bswapl %eax 262; CHECK-NEXT: bswapl %ecx 263; CHECK-NEXT: shrdl $16, %ecx, %eax 264; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 265; CHECK-NEXT: bswapl %edx 266; CHECK-NEXT: shrdl $16, %edx, %ecx 267; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 268; CHECK-NEXT: bswapl %esi 269; CHECK-NEXT: shrdl $16, %esi, %edx 270; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 271; CHECK-NEXT: bswapl %edi 272; CHECK-NEXT: shrdl $16, %edi, %esi 273; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 274; CHECK-NEXT: bswapl %ebx 275; CHECK-NEXT: shrdl $16, %ebx, %edi 276; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 277; CHECK-NEXT: bswapl %ebp 278; CHECK-NEXT: shrdl $16, %ebp, %ebx 279; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 280; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 281; CHECK-NEXT: bswapl %ecx 282; CHECK-NEXT: shrdl $16, %ecx, %ebp 283; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 284; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 285; CHECK-NEXT: bswapl %eax 286; CHECK-NEXT: shrdl $16, %eax, %ecx 287; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 288; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 289; CHECK-NEXT: bswapl %ecx 290; CHECK-NEXT: shrdl $16, %ecx, %eax 291; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 292; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 293; CHECK-NEXT: bswapl %eax 294; CHECK-NEXT: shrdl $16, %eax, %ecx 295; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 296; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp 297; CHECK-NEXT: bswapl %ebp 298; CHECK-NEXT: shrdl $16, %ebp, %eax 299; CHECK-NEXT: movl %eax, (%esp) # 4-byte Spill 300; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx 301; CHECK-NEXT: bswapl %ebx 302; CHECK-NEXT: shrdl $16, %ebx, %ebp 303; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi 304; CHECK-NEXT: bswapl %esi 305; CHECK-NEXT: shrdl $16, %esi, %ebx 306; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 307; CHECK-NEXT: bswapl %edx 308; CHECK-NEXT: shrdl $16, %edx, %esi 309; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 310; CHECK-NEXT: bswapl %ecx 311; CHECK-NEXT: shrdl $16, %ecx, %edx 312; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi 313; CHECK-NEXT: bswapl %edi 314; CHECK-NEXT: shrdl $16, %edi, %ecx 315; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 316; CHECK-NEXT: movl %ecx, 60(%eax) 317; CHECK-NEXT: movl %edx, 56(%eax) 318; CHECK-NEXT: movl %esi, 52(%eax) 319; CHECK-NEXT: movl %ebx, 48(%eax) 320; CHECK-NEXT: movl %ebp, 44(%eax) 321; CHECK-NEXT: movl (%esp), %ecx # 4-byte Reload 322; CHECK-NEXT: movl %ecx, 40(%eax) 323; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 324; CHECK-NEXT: movl %ecx, 36(%eax) 325; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 326; CHECK-NEXT: movl %ecx, 32(%eax) 327; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 328; CHECK-NEXT: movl %ecx, 28(%eax) 329; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 330; CHECK-NEXT: movl %ecx, 24(%eax) 331; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 332; CHECK-NEXT: movl %ecx, 20(%eax) 333; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 334; CHECK-NEXT: movl %ecx, 16(%eax) 335; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 336; CHECK-NEXT: movl %ecx, 12(%eax) 337; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 338; CHECK-NEXT: movl %ecx, 8(%eax) 339; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 340; CHECK-NEXT: movl %ecx, 4(%eax) 341; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 342; CHECK-NEXT: movl %ecx, (%eax) 343; CHECK-NEXT: shrl $16, %edi 344; CHECK-NEXT: movw %di, 64(%eax) 345; CHECK-NEXT: addl $44, %esp 346; CHECK-NEXT: popl %esi 347; CHECK-NEXT: popl %edi 348; CHECK-NEXT: popl %ebx 349; CHECK-NEXT: popl %ebp 350; CHECK-NEXT: retl $4 351; 352; CHECK64-LABEL: large_promotion: 353; CHECK64: # %bb.0: 354; CHECK64-NEXT: pushq %rbx 355; CHECK64-NEXT: movq %rdi, %rax 356; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %rbx 357; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %r11 358; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %rdi 359; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %r10 360; CHECK64-NEXT: bswapq %r10 361; CHECK64-NEXT: bswapq %rdi 362; CHECK64-NEXT: shrdq $48, %rdi, %r10 363; CHECK64-NEXT: bswapq %r11 364; CHECK64-NEXT: shrdq $48, %r11, %rdi 365; CHECK64-NEXT: bswapq %rbx 366; CHECK64-NEXT: shrdq $48, %rbx, %r11 367; CHECK64-NEXT: bswapq %r9 368; CHECK64-NEXT: shrdq $48, %r9, %rbx 369; CHECK64-NEXT: bswapq %r8 370; CHECK64-NEXT: shrdq $48, %r8, %r9 371; CHECK64-NEXT: bswapq %rcx 372; CHECK64-NEXT: shrdq $48, %rcx, %r8 373; CHECK64-NEXT: bswapq %rdx 374; CHECK64-NEXT: shrdq $48, %rdx, %rcx 375; CHECK64-NEXT: bswapq %rsi 376; CHECK64-NEXT: shrdq $48, %rsi, %rdx 377; CHECK64-NEXT: shrq $48, %rsi 378; CHECK64-NEXT: movq %rdx, 56(%rax) 379; CHECK64-NEXT: movq %rcx, 48(%rax) 380; CHECK64-NEXT: movq %r8, 40(%rax) 381; CHECK64-NEXT: movq %r9, 32(%rax) 382; CHECK64-NEXT: movq %rbx, 24(%rax) 383; CHECK64-NEXT: movq %r11, 16(%rax) 384; CHECK64-NEXT: movq %rdi, 8(%rax) 385; CHECK64-NEXT: movq %r10, (%rax) 386; CHECK64-NEXT: movw %si, 64(%rax) 387; CHECK64-NEXT: popq %rbx 388; CHECK64-NEXT: retq 389 %Z = call i528 @llvm.bswap.i528(i528 %A) 390 ret i528 %Z 391} 392declare i528 @llvm.bswap.i528(i528) 393