1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 -O3 | FileCheck %s --check-prefix=SSE41-X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 -O3 | FileCheck %s --check-prefix=SSE41-X64 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefix=AVX-X86 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefix=AVX-X64 6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefix=AVX-X86 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefix=AVX-X64 8 9declare float @llvm.experimental.constrained.ceil.f32(float, metadata) 10declare double @llvm.experimental.constrained.ceil.f64(double, metadata) 11declare float @llvm.experimental.constrained.floor.f32(float, metadata) 12declare double @llvm.experimental.constrained.floor.f64(double, metadata) 13declare float @llvm.experimental.constrained.trunc.f32(float, metadata) 14declare double @llvm.experimental.constrained.trunc.f64(double, metadata) 15declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata) 16declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata) 17declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata) 18declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) 19declare float @llvm.experimental.constrained.round.f32(float, metadata) 20declare double @llvm.experimental.constrained.round.f64(double, metadata) 21declare float @llvm.experimental.constrained.roundeven.f32(float, metadata) 22declare double @llvm.experimental.constrained.roundeven.f64(double, metadata) 23 24define float @fceil32(float %f) #0 { 25; SSE41-X86-LABEL: fceil32: 26; SSE41-X86: # %bb.0: 27; SSE41-X86-NEXT: pushl %eax 28; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 29; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 30; SSE41-X86-NEXT: roundss $10, %xmm0, %xmm0 31; SSE41-X86-NEXT: movss %xmm0, (%esp) 32; SSE41-X86-NEXT: flds (%esp) 33; SSE41-X86-NEXT: wait 34; SSE41-X86-NEXT: popl %eax 35; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 36; SSE41-X86-NEXT: retl 37; 38; SSE41-X64-LABEL: fceil32: 39; SSE41-X64: # %bb.0: 40; SSE41-X64-NEXT: roundss $10, %xmm0, %xmm0 41; SSE41-X64-NEXT: retq 42; 43; AVX-X86-LABEL: fceil32: 44; AVX-X86: # %bb.0: 45; AVX-X86-NEXT: pushl %eax 46; AVX-X86-NEXT: .cfi_def_cfa_offset 8 47; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 48; AVX-X86-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 49; AVX-X86-NEXT: vmovss %xmm0, (%esp) 50; AVX-X86-NEXT: flds (%esp) 51; AVX-X86-NEXT: wait 52; AVX-X86-NEXT: popl %eax 53; AVX-X86-NEXT: .cfi_def_cfa_offset 4 54; AVX-X86-NEXT: retl 55; 56; AVX-X64-LABEL: fceil32: 57; AVX-X64: # %bb.0: 58; AVX-X64-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 59; AVX-X64-NEXT: retq 60 %res = call float @llvm.experimental.constrained.ceil.f32( 61 float %f, metadata !"fpexcept.strict") #0 62 ret float %res 63} 64 65define double @fceilf64(double %f) #0 { 66; SSE41-X86-LABEL: fceilf64: 67; SSE41-X86: # %bb.0: 68; SSE41-X86-NEXT: pushl %ebp 69; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 70; SSE41-X86-NEXT: .cfi_offset %ebp, -8 71; SSE41-X86-NEXT: movl %esp, %ebp 72; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp 73; SSE41-X86-NEXT: andl $-8, %esp 74; SSE41-X86-NEXT: subl $8, %esp 75; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 76; SSE41-X86-NEXT: roundsd $10, %xmm0, %xmm0 77; SSE41-X86-NEXT: movsd %xmm0, (%esp) 78; SSE41-X86-NEXT: fldl (%esp) 79; SSE41-X86-NEXT: wait 80; SSE41-X86-NEXT: movl %ebp, %esp 81; SSE41-X86-NEXT: popl %ebp 82; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4 83; SSE41-X86-NEXT: retl 84; 85; SSE41-X64-LABEL: fceilf64: 86; SSE41-X64: # %bb.0: 87; SSE41-X64-NEXT: roundsd $10, %xmm0, %xmm0 88; SSE41-X64-NEXT: retq 89; 90; AVX-X86-LABEL: fceilf64: 91; AVX-X86: # %bb.0: 92; AVX-X86-NEXT: pushl %ebp 93; AVX-X86-NEXT: .cfi_def_cfa_offset 8 94; AVX-X86-NEXT: .cfi_offset %ebp, -8 95; AVX-X86-NEXT: movl %esp, %ebp 96; AVX-X86-NEXT: .cfi_def_cfa_register %ebp 97; AVX-X86-NEXT: andl $-8, %esp 98; AVX-X86-NEXT: subl $8, %esp 99; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 100; AVX-X86-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0 101; AVX-X86-NEXT: vmovsd %xmm0, (%esp) 102; AVX-X86-NEXT: fldl (%esp) 103; AVX-X86-NEXT: wait 104; AVX-X86-NEXT: movl %ebp, %esp 105; AVX-X86-NEXT: popl %ebp 106; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 107; AVX-X86-NEXT: retl 108; 109; AVX-X64-LABEL: fceilf64: 110; AVX-X64: # %bb.0: 111; AVX-X64-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0 112; AVX-X64-NEXT: retq 113 %res = call double @llvm.experimental.constrained.ceil.f64( 114 double %f, metadata !"fpexcept.strict") #0 115 ret double %res 116} 117 118define float @ffloor32(float %f) #0 { 119; SSE41-X86-LABEL: ffloor32: 120; SSE41-X86: # %bb.0: 121; SSE41-X86-NEXT: pushl %eax 122; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 123; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 124; SSE41-X86-NEXT: roundss $9, %xmm0, %xmm0 125; SSE41-X86-NEXT: movss %xmm0, (%esp) 126; SSE41-X86-NEXT: flds (%esp) 127; SSE41-X86-NEXT: wait 128; SSE41-X86-NEXT: popl %eax 129; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 130; SSE41-X86-NEXT: retl 131; 132; SSE41-X64-LABEL: ffloor32: 133; SSE41-X64: # %bb.0: 134; SSE41-X64-NEXT: roundss $9, %xmm0, %xmm0 135; SSE41-X64-NEXT: retq 136; 137; AVX-X86-LABEL: ffloor32: 138; AVX-X86: # %bb.0: 139; AVX-X86-NEXT: pushl %eax 140; AVX-X86-NEXT: .cfi_def_cfa_offset 8 141; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 142; AVX-X86-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 143; AVX-X86-NEXT: vmovss %xmm0, (%esp) 144; AVX-X86-NEXT: flds (%esp) 145; AVX-X86-NEXT: wait 146; AVX-X86-NEXT: popl %eax 147; AVX-X86-NEXT: .cfi_def_cfa_offset 4 148; AVX-X86-NEXT: retl 149; 150; AVX-X64-LABEL: ffloor32: 151; AVX-X64: # %bb.0: 152; AVX-X64-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 153; AVX-X64-NEXT: retq 154 %res = call float @llvm.experimental.constrained.floor.f32( 155 float %f, metadata !"fpexcept.strict") #0 156 ret float %res 157} 158 159define double @ffloorf64(double %f) #0 { 160; SSE41-X86-LABEL: ffloorf64: 161; SSE41-X86: # %bb.0: 162; SSE41-X86-NEXT: pushl %ebp 163; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 164; SSE41-X86-NEXT: .cfi_offset %ebp, -8 165; SSE41-X86-NEXT: movl %esp, %ebp 166; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp 167; SSE41-X86-NEXT: andl $-8, %esp 168; SSE41-X86-NEXT: subl $8, %esp 169; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 170; SSE41-X86-NEXT: roundsd $9, %xmm0, %xmm0 171; SSE41-X86-NEXT: movsd %xmm0, (%esp) 172; SSE41-X86-NEXT: fldl (%esp) 173; SSE41-X86-NEXT: wait 174; SSE41-X86-NEXT: movl %ebp, %esp 175; SSE41-X86-NEXT: popl %ebp 176; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4 177; SSE41-X86-NEXT: retl 178; 179; SSE41-X64-LABEL: ffloorf64: 180; SSE41-X64: # %bb.0: 181; SSE41-X64-NEXT: roundsd $9, %xmm0, %xmm0 182; SSE41-X64-NEXT: retq 183; 184; AVX-X86-LABEL: ffloorf64: 185; AVX-X86: # %bb.0: 186; AVX-X86-NEXT: pushl %ebp 187; AVX-X86-NEXT: .cfi_def_cfa_offset 8 188; AVX-X86-NEXT: .cfi_offset %ebp, -8 189; AVX-X86-NEXT: movl %esp, %ebp 190; AVX-X86-NEXT: .cfi_def_cfa_register %ebp 191; AVX-X86-NEXT: andl $-8, %esp 192; AVX-X86-NEXT: subl $8, %esp 193; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 194; AVX-X86-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0 195; AVX-X86-NEXT: vmovsd %xmm0, (%esp) 196; AVX-X86-NEXT: fldl (%esp) 197; AVX-X86-NEXT: wait 198; AVX-X86-NEXT: movl %ebp, %esp 199; AVX-X86-NEXT: popl %ebp 200; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 201; AVX-X86-NEXT: retl 202; 203; AVX-X64-LABEL: ffloorf64: 204; AVX-X64: # %bb.0: 205; AVX-X64-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0 206; AVX-X64-NEXT: retq 207 %res = call double @llvm.experimental.constrained.floor.f64( 208 double %f, metadata !"fpexcept.strict") #0 209 ret double %res 210} 211 212define float @ftrunc32(float %f) #0 { 213; SSE41-X86-LABEL: ftrunc32: 214; SSE41-X86: # %bb.0: 215; SSE41-X86-NEXT: pushl %eax 216; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 217; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 218; SSE41-X86-NEXT: roundss $11, %xmm0, %xmm0 219; SSE41-X86-NEXT: movss %xmm0, (%esp) 220; SSE41-X86-NEXT: flds (%esp) 221; SSE41-X86-NEXT: wait 222; SSE41-X86-NEXT: popl %eax 223; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 224; SSE41-X86-NEXT: retl 225; 226; SSE41-X64-LABEL: ftrunc32: 227; SSE41-X64: # %bb.0: 228; SSE41-X64-NEXT: roundss $11, %xmm0, %xmm0 229; SSE41-X64-NEXT: retq 230; 231; AVX-X86-LABEL: ftrunc32: 232; AVX-X86: # %bb.0: 233; AVX-X86-NEXT: pushl %eax 234; AVX-X86-NEXT: .cfi_def_cfa_offset 8 235; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 236; AVX-X86-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 237; AVX-X86-NEXT: vmovss %xmm0, (%esp) 238; AVX-X86-NEXT: flds (%esp) 239; AVX-X86-NEXT: wait 240; AVX-X86-NEXT: popl %eax 241; AVX-X86-NEXT: .cfi_def_cfa_offset 4 242; AVX-X86-NEXT: retl 243; 244; AVX-X64-LABEL: ftrunc32: 245; AVX-X64: # %bb.0: 246; AVX-X64-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 247; AVX-X64-NEXT: retq 248 %res = call float @llvm.experimental.constrained.trunc.f32( 249 float %f, metadata !"fpexcept.strict") #0 250 ret float %res 251} 252 253define double @ftruncf64(double %f) #0 { 254; SSE41-X86-LABEL: ftruncf64: 255; SSE41-X86: # %bb.0: 256; SSE41-X86-NEXT: pushl %ebp 257; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 258; SSE41-X86-NEXT: .cfi_offset %ebp, -8 259; SSE41-X86-NEXT: movl %esp, %ebp 260; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp 261; SSE41-X86-NEXT: andl $-8, %esp 262; SSE41-X86-NEXT: subl $8, %esp 263; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 264; SSE41-X86-NEXT: roundsd $11, %xmm0, %xmm0 265; SSE41-X86-NEXT: movsd %xmm0, (%esp) 266; SSE41-X86-NEXT: fldl (%esp) 267; SSE41-X86-NEXT: wait 268; SSE41-X86-NEXT: movl %ebp, %esp 269; SSE41-X86-NEXT: popl %ebp 270; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4 271; SSE41-X86-NEXT: retl 272; 273; SSE41-X64-LABEL: ftruncf64: 274; SSE41-X64: # %bb.0: 275; SSE41-X64-NEXT: roundsd $11, %xmm0, %xmm0 276; SSE41-X64-NEXT: retq 277; 278; AVX-X86-LABEL: ftruncf64: 279; AVX-X86: # %bb.0: 280; AVX-X86-NEXT: pushl %ebp 281; AVX-X86-NEXT: .cfi_def_cfa_offset 8 282; AVX-X86-NEXT: .cfi_offset %ebp, -8 283; AVX-X86-NEXT: movl %esp, %ebp 284; AVX-X86-NEXT: .cfi_def_cfa_register %ebp 285; AVX-X86-NEXT: andl $-8, %esp 286; AVX-X86-NEXT: subl $8, %esp 287; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 288; AVX-X86-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 289; AVX-X86-NEXT: vmovsd %xmm0, (%esp) 290; AVX-X86-NEXT: fldl (%esp) 291; AVX-X86-NEXT: wait 292; AVX-X86-NEXT: movl %ebp, %esp 293; AVX-X86-NEXT: popl %ebp 294; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 295; AVX-X86-NEXT: retl 296; 297; AVX-X64-LABEL: ftruncf64: 298; AVX-X64: # %bb.0: 299; AVX-X64-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 300; AVX-X64-NEXT: retq 301 %res = call double @llvm.experimental.constrained.trunc.f64( 302 double %f, metadata !"fpexcept.strict") #0 303 ret double %res 304} 305 306define float @frint32(float %f) #0 { 307; SSE41-X86-LABEL: frint32: 308; SSE41-X86: # %bb.0: 309; SSE41-X86-NEXT: pushl %eax 310; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 311; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 312; SSE41-X86-NEXT: roundss $4, %xmm0, %xmm0 313; SSE41-X86-NEXT: movss %xmm0, (%esp) 314; SSE41-X86-NEXT: flds (%esp) 315; SSE41-X86-NEXT: wait 316; SSE41-X86-NEXT: popl %eax 317; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 318; SSE41-X86-NEXT: retl 319; 320; SSE41-X64-LABEL: frint32: 321; SSE41-X64: # %bb.0: 322; SSE41-X64-NEXT: roundss $4, %xmm0, %xmm0 323; SSE41-X64-NEXT: retq 324; 325; AVX-X86-LABEL: frint32: 326; AVX-X86: # %bb.0: 327; AVX-X86-NEXT: pushl %eax 328; AVX-X86-NEXT: .cfi_def_cfa_offset 8 329; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 330; AVX-X86-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0 331; AVX-X86-NEXT: vmovss %xmm0, (%esp) 332; AVX-X86-NEXT: flds (%esp) 333; AVX-X86-NEXT: wait 334; AVX-X86-NEXT: popl %eax 335; AVX-X86-NEXT: .cfi_def_cfa_offset 4 336; AVX-X86-NEXT: retl 337; 338; AVX-X64-LABEL: frint32: 339; AVX-X64: # %bb.0: 340; AVX-X64-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0 341; AVX-X64-NEXT: retq 342 %res = call float @llvm.experimental.constrained.rint.f32( 343 float %f, 344 metadata !"round.dynamic", metadata !"fpexcept.strict") #0 345 ret float %res 346} 347 348define double @frintf64(double %f) #0 { 349; SSE41-X86-LABEL: frintf64: 350; SSE41-X86: # %bb.0: 351; SSE41-X86-NEXT: pushl %ebp 352; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 353; SSE41-X86-NEXT: .cfi_offset %ebp, -8 354; SSE41-X86-NEXT: movl %esp, %ebp 355; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp 356; SSE41-X86-NEXT: andl $-8, %esp 357; SSE41-X86-NEXT: subl $8, %esp 358; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 359; SSE41-X86-NEXT: roundsd $4, %xmm0, %xmm0 360; SSE41-X86-NEXT: movsd %xmm0, (%esp) 361; SSE41-X86-NEXT: fldl (%esp) 362; SSE41-X86-NEXT: wait 363; SSE41-X86-NEXT: movl %ebp, %esp 364; SSE41-X86-NEXT: popl %ebp 365; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4 366; SSE41-X86-NEXT: retl 367; 368; SSE41-X64-LABEL: frintf64: 369; SSE41-X64: # %bb.0: 370; SSE41-X64-NEXT: roundsd $4, %xmm0, %xmm0 371; SSE41-X64-NEXT: retq 372; 373; AVX-X86-LABEL: frintf64: 374; AVX-X86: # %bb.0: 375; AVX-X86-NEXT: pushl %ebp 376; AVX-X86-NEXT: .cfi_def_cfa_offset 8 377; AVX-X86-NEXT: .cfi_offset %ebp, -8 378; AVX-X86-NEXT: movl %esp, %ebp 379; AVX-X86-NEXT: .cfi_def_cfa_register %ebp 380; AVX-X86-NEXT: andl $-8, %esp 381; AVX-X86-NEXT: subl $8, %esp 382; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 383; AVX-X86-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0 384; AVX-X86-NEXT: vmovsd %xmm0, (%esp) 385; AVX-X86-NEXT: fldl (%esp) 386; AVX-X86-NEXT: wait 387; AVX-X86-NEXT: movl %ebp, %esp 388; AVX-X86-NEXT: popl %ebp 389; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 390; AVX-X86-NEXT: retl 391; 392; AVX-X64-LABEL: frintf64: 393; AVX-X64: # %bb.0: 394; AVX-X64-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0 395; AVX-X64-NEXT: retq 396 %res = call double @llvm.experimental.constrained.rint.f64( 397 double %f, 398 metadata !"round.dynamic", metadata !"fpexcept.strict") #0 399 ret double %res 400} 401 402define float @fnearbyint32(float %f) #0 { 403; SSE41-X86-LABEL: fnearbyint32: 404; SSE41-X86: # %bb.0: 405; SSE41-X86-NEXT: pushl %eax 406; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 407; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 408; SSE41-X86-NEXT: roundss $12, %xmm0, %xmm0 409; SSE41-X86-NEXT: movss %xmm0, (%esp) 410; SSE41-X86-NEXT: flds (%esp) 411; SSE41-X86-NEXT: wait 412; SSE41-X86-NEXT: popl %eax 413; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 414; SSE41-X86-NEXT: retl 415; 416; SSE41-X64-LABEL: fnearbyint32: 417; SSE41-X64: # %bb.0: 418; SSE41-X64-NEXT: roundss $12, %xmm0, %xmm0 419; SSE41-X64-NEXT: retq 420; 421; AVX-X86-LABEL: fnearbyint32: 422; AVX-X86: # %bb.0: 423; AVX-X86-NEXT: pushl %eax 424; AVX-X86-NEXT: .cfi_def_cfa_offset 8 425; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 426; AVX-X86-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0 427; AVX-X86-NEXT: vmovss %xmm0, (%esp) 428; AVX-X86-NEXT: flds (%esp) 429; AVX-X86-NEXT: wait 430; AVX-X86-NEXT: popl %eax 431; AVX-X86-NEXT: .cfi_def_cfa_offset 4 432; AVX-X86-NEXT: retl 433; 434; AVX-X64-LABEL: fnearbyint32: 435; AVX-X64: # %bb.0: 436; AVX-X64-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0 437; AVX-X64-NEXT: retq 438 %res = call float @llvm.experimental.constrained.nearbyint.f32( 439 float %f, 440 metadata !"round.dynamic", metadata !"fpexcept.strict") #0 441 ret float %res 442} 443 444define double @fnearbyintf64(double %f) #0 { 445; SSE41-X86-LABEL: fnearbyintf64: 446; SSE41-X86: # %bb.0: 447; SSE41-X86-NEXT: pushl %ebp 448; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 449; SSE41-X86-NEXT: .cfi_offset %ebp, -8 450; SSE41-X86-NEXT: movl %esp, %ebp 451; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp 452; SSE41-X86-NEXT: andl $-8, %esp 453; SSE41-X86-NEXT: subl $8, %esp 454; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 455; SSE41-X86-NEXT: roundsd $12, %xmm0, %xmm0 456; SSE41-X86-NEXT: movsd %xmm0, (%esp) 457; SSE41-X86-NEXT: fldl (%esp) 458; SSE41-X86-NEXT: wait 459; SSE41-X86-NEXT: movl %ebp, %esp 460; SSE41-X86-NEXT: popl %ebp 461; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4 462; SSE41-X86-NEXT: retl 463; 464; SSE41-X64-LABEL: fnearbyintf64: 465; SSE41-X64: # %bb.0: 466; SSE41-X64-NEXT: roundsd $12, %xmm0, %xmm0 467; SSE41-X64-NEXT: retq 468; 469; AVX-X86-LABEL: fnearbyintf64: 470; AVX-X86: # %bb.0: 471; AVX-X86-NEXT: pushl %ebp 472; AVX-X86-NEXT: .cfi_def_cfa_offset 8 473; AVX-X86-NEXT: .cfi_offset %ebp, -8 474; AVX-X86-NEXT: movl %esp, %ebp 475; AVX-X86-NEXT: .cfi_def_cfa_register %ebp 476; AVX-X86-NEXT: andl $-8, %esp 477; AVX-X86-NEXT: subl $8, %esp 478; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 479; AVX-X86-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0 480; AVX-X86-NEXT: vmovsd %xmm0, (%esp) 481; AVX-X86-NEXT: fldl (%esp) 482; AVX-X86-NEXT: wait 483; AVX-X86-NEXT: movl %ebp, %esp 484; AVX-X86-NEXT: popl %ebp 485; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 486; AVX-X86-NEXT: retl 487; 488; AVX-X64-LABEL: fnearbyintf64: 489; AVX-X64: # %bb.0: 490; AVX-X64-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0 491; AVX-X64-NEXT: retq 492 %res = call double @llvm.experimental.constrained.nearbyint.f64( 493 double %f, 494 metadata !"round.dynamic", metadata !"fpexcept.strict") #0 495 ret double %res 496} 497 498define float @fround32(float %f) #0 { 499; SSE41-X86-LABEL: fround32: 500; SSE41-X86: # %bb.0: 501; SSE41-X86-NEXT: pushl %eax 502; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 503; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 504; SSE41-X86-NEXT: movss %xmm0, (%esp) 505; SSE41-X86-NEXT: calll roundf 506; SSE41-X86-NEXT: popl %eax 507; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 508; SSE41-X86-NEXT: retl 509; 510; SSE41-X64-LABEL: fround32: 511; SSE41-X64: # %bb.0: 512; SSE41-X64-NEXT: pushq %rax 513; SSE41-X64-NEXT: .cfi_def_cfa_offset 16 514; SSE41-X64-NEXT: callq roundf 515; SSE41-X64-NEXT: popq %rax 516; SSE41-X64-NEXT: .cfi_def_cfa_offset 8 517; SSE41-X64-NEXT: retq 518; 519; AVX-X86-LABEL: fround32: 520; AVX-X86: # %bb.0: 521; AVX-X86-NEXT: pushl %eax 522; AVX-X86-NEXT: .cfi_def_cfa_offset 8 523; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 524; AVX-X86-NEXT: vmovss %xmm0, (%esp) 525; AVX-X86-NEXT: calll roundf 526; AVX-X86-NEXT: popl %eax 527; AVX-X86-NEXT: .cfi_def_cfa_offset 4 528; AVX-X86-NEXT: retl 529; 530; AVX-X64-LABEL: fround32: 531; AVX-X64: # %bb.0: 532; AVX-X64-NEXT: pushq %rax 533; AVX-X64-NEXT: .cfi_def_cfa_offset 16 534; AVX-X64-NEXT: callq roundf 535; AVX-X64-NEXT: popq %rax 536; AVX-X64-NEXT: .cfi_def_cfa_offset 8 537; AVX-X64-NEXT: retq 538 %res = call float @llvm.experimental.constrained.round.f32( 539 float %f, metadata !"fpexcept.strict") #0 540 ret float %res 541} 542 543define double @froundf64(double %f) #0 { 544; SSE41-X86-LABEL: froundf64: 545; SSE41-X86: # %bb.0: 546; SSE41-X86-NEXT: subl $8, %esp 547; SSE41-X86-NEXT: .cfi_def_cfa_offset 12 548; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 549; SSE41-X86-NEXT: movsd %xmm0, (%esp) 550; SSE41-X86-NEXT: calll round 551; SSE41-X86-NEXT: addl $8, %esp 552; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 553; SSE41-X86-NEXT: retl 554; 555; SSE41-X64-LABEL: froundf64: 556; SSE41-X64: # %bb.0: 557; SSE41-X64-NEXT: pushq %rax 558; SSE41-X64-NEXT: .cfi_def_cfa_offset 16 559; SSE41-X64-NEXT: callq round 560; SSE41-X64-NEXT: popq %rax 561; SSE41-X64-NEXT: .cfi_def_cfa_offset 8 562; SSE41-X64-NEXT: retq 563; 564; AVX-X86-LABEL: froundf64: 565; AVX-X86: # %bb.0: 566; AVX-X86-NEXT: subl $8, %esp 567; AVX-X86-NEXT: .cfi_def_cfa_offset 12 568; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 569; AVX-X86-NEXT: vmovsd %xmm0, (%esp) 570; AVX-X86-NEXT: calll round 571; AVX-X86-NEXT: addl $8, %esp 572; AVX-X86-NEXT: .cfi_def_cfa_offset 4 573; AVX-X86-NEXT: retl 574; 575; AVX-X64-LABEL: froundf64: 576; AVX-X64: # %bb.0: 577; AVX-X64-NEXT: pushq %rax 578; AVX-X64-NEXT: .cfi_def_cfa_offset 16 579; AVX-X64-NEXT: callq round 580; AVX-X64-NEXT: popq %rax 581; AVX-X64-NEXT: .cfi_def_cfa_offset 8 582; AVX-X64-NEXT: retq 583 %res = call double @llvm.experimental.constrained.round.f64( 584 double %f, metadata !"fpexcept.strict") #0 585 ret double %res 586} 587 588define float @froundeven32(float %f) #0 { 589; SSE41-X86-LABEL: froundeven32: 590; SSE41-X86: # %bb.0: 591; SSE41-X86-NEXT: pushl %eax 592; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 593; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 594; SSE41-X86-NEXT: roundss $8, %xmm0, %xmm0 595; SSE41-X86-NEXT: movss %xmm0, (%esp) 596; SSE41-X86-NEXT: flds (%esp) 597; SSE41-X86-NEXT: wait 598; SSE41-X86-NEXT: popl %eax 599; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 600; SSE41-X86-NEXT: retl 601; 602; SSE41-X64-LABEL: froundeven32: 603; SSE41-X64: # %bb.0: 604; SSE41-X64-NEXT: roundss $8, %xmm0, %xmm0 605; SSE41-X64-NEXT: retq 606; 607; AVX-X86-LABEL: froundeven32: 608; AVX-X86: # %bb.0: 609; AVX-X86-NEXT: pushl %eax 610; AVX-X86-NEXT: .cfi_def_cfa_offset 8 611; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 612; AVX-X86-NEXT: vroundss $8, %xmm0, %xmm0, %xmm0 613; AVX-X86-NEXT: vmovss %xmm0, (%esp) 614; AVX-X86-NEXT: flds (%esp) 615; AVX-X86-NEXT: wait 616; AVX-X86-NEXT: popl %eax 617; AVX-X86-NEXT: .cfi_def_cfa_offset 4 618; AVX-X86-NEXT: retl 619; 620; AVX-X64-LABEL: froundeven32: 621; AVX-X64: # %bb.0: 622; AVX-X64-NEXT: vroundss $8, %xmm0, %xmm0, %xmm0 623; AVX-X64-NEXT: retq 624 %res = call float @llvm.experimental.constrained.roundeven.f32( 625 float %f, metadata !"fpexcept.strict") #0 626 ret float %res 627} 628 629define double @froundevenf64(double %f) #0 { 630; SSE41-X86-LABEL: froundevenf64: 631; SSE41-X86: # %bb.0: 632; SSE41-X86-NEXT: pushl %ebp 633; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 634; SSE41-X86-NEXT: .cfi_offset %ebp, -8 635; SSE41-X86-NEXT: movl %esp, %ebp 636; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp 637; SSE41-X86-NEXT: andl $-8, %esp 638; SSE41-X86-NEXT: subl $8, %esp 639; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 640; SSE41-X86-NEXT: roundsd $8, %xmm0, %xmm0 641; SSE41-X86-NEXT: movsd %xmm0, (%esp) 642; SSE41-X86-NEXT: fldl (%esp) 643; SSE41-X86-NEXT: wait 644; SSE41-X86-NEXT: movl %ebp, %esp 645; SSE41-X86-NEXT: popl %ebp 646; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4 647; SSE41-X86-NEXT: retl 648; 649; SSE41-X64-LABEL: froundevenf64: 650; SSE41-X64: # %bb.0: 651; SSE41-X64-NEXT: roundsd $8, %xmm0, %xmm0 652; SSE41-X64-NEXT: retq 653; 654; AVX-X86-LABEL: froundevenf64: 655; AVX-X86: # %bb.0: 656; AVX-X86-NEXT: pushl %ebp 657; AVX-X86-NEXT: .cfi_def_cfa_offset 8 658; AVX-X86-NEXT: .cfi_offset %ebp, -8 659; AVX-X86-NEXT: movl %esp, %ebp 660; AVX-X86-NEXT: .cfi_def_cfa_register %ebp 661; AVX-X86-NEXT: andl $-8, %esp 662; AVX-X86-NEXT: subl $8, %esp 663; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 664; AVX-X86-NEXT: vroundsd $8, %xmm0, %xmm0, %xmm0 665; AVX-X86-NEXT: vmovsd %xmm0, (%esp) 666; AVX-X86-NEXT: fldl (%esp) 667; AVX-X86-NEXT: wait 668; AVX-X86-NEXT: movl %ebp, %esp 669; AVX-X86-NEXT: popl %ebp 670; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 671; AVX-X86-NEXT: retl 672; 673; AVX-X64-LABEL: froundevenf64: 674; AVX-X64: # %bb.0: 675; AVX-X64-NEXT: vroundsd $8, %xmm0, %xmm0, %xmm0 676; AVX-X64-NEXT: retq 677 %res = call double @llvm.experimental.constrained.roundeven.f64( 678 double %f, metadata !"fpexcept.strict") #0 679 ret double %res 680} 681 682attributes #0 = { strictfp } 683