1module &__llvm_hsail_module:1:0:$full:$large:$near; 2 3prog kernel &mmul2d( 4 kernarg_u64 %__arg_p0, 5 kernarg_u64 %__arg_p1, 6 kernarg_u64 %__arg_p2, 7 kernarg_u64 %__arg_p3) 8{ 9 pragma "AMD RTI", "ARGSTART:mmul2d"; 10 pragma "AMD RTI", "version:3:1:104"; 11 pragma "AMD RTI", "device:generic"; 12 pragma "AMD RTI", "uniqueid:1025"; 13 pragma "AMD RTI", "function:1:0"; 14 pragma "AMD RTI", "memory:64bitABI"; 15 pragma "AMD RTI", "privateid:1"; 16 pragma "AMD RTI", "ARGEND:mmul2d"; 17 // BB#0: // %top 18 mov_f64 $d1, 0.0E+0; 19 gridsize_u32 $s0, 0; 20 workitemabsid_u32 $s1, 1; 21 workitemabsid_u32 $s2, 0; 22 cvt_u64_u32 $d0, $s2; 23 cvt_u64_u32 $d3, $s1; 24 cvt_u64_u32 $d4, $s0; 25 ld_kernarg_align(8)_width(all)_u64 $d2, [%__arg_p2]; 26 ld_kernarg_align(8)_width(all)_u64 $d6, [%__arg_p1]; 27 ld_kernarg_align(8)_width(all)_u64 $d5, [%__arg_p3]; 28 ld_kernarg_align(8)_width(all)_u64 $d7, [%__arg_p0]; 29 cmp_lt_b1_s64 $c0, $d5, 1; 30 cbr_b1 $c0, @BB0_3; 31 // BB#1: // %L.preheader 32 mul_u64 $d1, $d5, $d3; 33 shl_u64 $d1, $d1, 3; 34 shl_u64 $d8, $d0, 3; 35 add_u64 $d8, $d7, $d8; 36 add_u64 $d6, $d6, $d1; 37 shl_u64 $d7, $d4, 3; 38 mov_f64 $d1, 0D0000000000000000; 39 40@BB0_2: 41 // %L 42 add_u64 $d9, $d8, $d7; 43 ld_global_f64 $d8, [$d8]; 44 ld_global_f64 $d10, [$d6]; 45 mul_f64 $d8, $d8, $d10; 46 add_f64 $d1, $d1, $d8; 47 add_u64 $d6, $d6, 8; 48 add_u64 $d5, $d5, 18446744073709551615; 49 cmp_ne_b1_s64 $c0, $d5, 0; 50 mov_b64 $d8, $d9; 51 cbr_b1 $c0, @BB0_2; 52 53@BB0_3: 54 // %L.7 55 mul_u64 $d3, $d3, $d4; 56 add_u64 $d0, $d3, $d0; 57 shl_u64 $d0, $d0, 3; 58 add_u64 $d0, $d2, $d0; 59 st_global_f64 $d1, [$d0]; 60 ret; 61}; 62 63