1 //////////////////////////////////////////////////////////////////////////////// 2 // 3 // The University of Illinois/NCSA 4 // Open Source License (NCSA) 5 // 6 // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 // 8 // Developed by: 9 // 10 // AMD Research and AMD HSA Software Development 11 // 12 // Advanced Micro Devices, Inc. 13 // 14 // www.amd.com 15 // 16 // Permission is hereby granted, free of charge, to any person obtaining a copy 17 // of this software and associated documentation files (the "Software"), to 18 // deal with the Software without restriction, including without limitation 19 // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 // and/or sell copies of the Software, and to permit persons to whom the 21 // Software is furnished to do so, subject to the following conditions: 22 // 23 // - Redistributions of source code must retain the above copyright notice, 24 // this list of conditions and the following disclaimers. 25 // - Redistributions in binary form must reproduce the above copyright 26 // notice, this list of conditions and the following disclaimers in 27 // the documentation and/or other materials provided with the distribution. 28 // - Neither the names of Advanced Micro Devices, Inc, 29 // nor the names of its contributors may be used to endorse or promote 30 // products derived from this Software without specific prior written 31 // permission. 32 // 33 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 // DEALINGS WITH THE SOFTWARE. 40 // 41 //////////////////////////////////////////////////////////////////////////////// 42 43 #ifndef HSA_RUNTIME_CORE_INC_AMD_GPU_SHADERS_H_ 44 #define HSA_RUNTIME_CORE_INC_AMD_GPU_SHADERS_H_ 45 46 namespace amd { 47 48 static const unsigned int kCodeCopyAligned7[] = { 49 0xC0820100, 0xC0840104, 0xC0860108, 0xC088010C, 0xC08A0110, 0xC00C0114, 50 0xBF8C007F, 0x8F028602, 0x4A000002, 0x7E060205, 0xD24A6A02, 0x00000900, 51 0xD2506A03, 0x01A90103, 0x7E0A0207, 0xD24A6A04, 0x00000D00, 0xD2506A05, 52 0x01A90105, 0xD1C2006A, 0x00001102, 0xBF86000F, 0x87FE6A7E, 0xDC200000, 53 0x01000002, 0xBF8C0F70, 0xD24A6A02, 0x00003102, 0xD2506A03, 0x01A90103, 54 0xDC600000, 0x00000104, 0xD24A6A04, 0x00003104, 0xD2506A05, 0x01A90105, 55 0xBF82FFEE, 0xBEFE04C1, 0x8F198418, 0x34020084, 0x7E060209, 0xD24A6A02, 56 0x00001101, 0xD2506A03, 0x01A90103, 0x7E0A020B, 0xD24A6A04, 0x00001501, 57 0xD2506A05, 0x01A90105, 0xD1C2006A, 0x00001902, 0xBF86000E, 0xDC380000, 58 0x08000002, 0xD24A6A02, 0x00003302, 0xD2506A03, 0x01A90103, 0xBF8C0F70, 59 0xDC780000, 0x00000804, 0xD24A6A04, 0x00003304, 0xD2506A05, 0x01A90105, 60 0xBF82FFEF, 0x8F198218, 0x34020082, 0x7E06020D, 0xD24A6A02, 0x00001901, 61 0xD2506A03, 0x01A90103, 0x7E0A020F, 0xD24A6A04, 0x00001D01, 0xD2506A05, 62 0x01A90105, 0xD1C2006A, 0x00002102, 0xBF86000F, 0x87FE6A7E, 0xDC300000, 63 0x01000002, 0xD24A6A02, 0x00003302, 0xD2506A03, 0x01A90103, 0xBF8C0F70, 64 0xDC700000, 0x00000104, 0xD24A6A04, 0x00003304, 0xD2506A05, 0x01A90105, 65 0xBF82FFEE, 0xBEFE04C1, 0x7E060211, 0xD24A6A02, 0x00002100, 0xD2506A03, 66 0x01A90103, 0x7E0A0213, 0xD24A6A04, 0x00002500, 0xD2506A05, 0x01A90105, 67 0xD1C2006A, 0x00002902, 0xBF860006, 0x87FE6A7E, 0xDC200000, 0x01000002, 68 0xBF8C0F70, 0xDC600000, 0x00000104, 0xBF810000, 69 }; 70 71 static const unsigned int kCodeCopyMisaligned7[] = { 72 0xC0820100, 0xC0840104, 0xC0860108, 0xC008010C, 0xBF8C007F, 0x8F028602, 73 0x4A000002, 0x7E060205, 0xD24A6A02, 0x00000900, 0xD2506A03, 0x01A90103, 74 0x7E0A0207, 0xD24A6A04, 0x00000D00, 0xD2506A05, 0x01A90105, 0xD1C2006A, 75 0x00001102, 0xBF860032, 0xDC200000, 0x06000002, 0xD24A6A02, 0x00002102, 76 0xD2506A03, 0x01A90103, 0xDC200000, 0x07000002, 0xD24A6A02, 0x00002102, 77 0xD2506A03, 0x01A90103, 0xDC200000, 0x08000002, 0xD24A6A02, 0x00002102, 78 0xD2506A03, 0x01A90103, 0xDC200000, 0x09000002, 0xD24A6A02, 0x00002102, 79 0xD2506A03, 0x01A90103, 0xBF8C0F70, 0xDC600000, 0x00000604, 0xD24A6A04, 80 0x00002104, 0xD2506A05, 0x01A90105, 0xDC600000, 0x00000704, 0xD24A6A04, 81 0x00002104, 0xD2506A05, 0x01A90105, 0xDC600000, 0x00000804, 0xD24A6A04, 82 0x00002104, 0xD2506A05, 0x01A90105, 0xDC600000, 0x00000904, 0xD24A6A04, 83 0x00002104, 0xD2506A05, 0x01A90105, 0xBF82FFCB, 0x7E060209, 0xD24A6A02, 84 0x00001100, 0xD2506A03, 0x01A90103, 0x7E0A020B, 0xD24A6A04, 0x00001500, 85 0xD2506A05, 0x01A90105, 0xD1C2006A, 0x00001902, 0xBF86000F, 0x87FE6A7E, 86 0xDC200000, 0x01000002, 0xD24A6A02, 0x00002102, 0xD2506A03, 0x01A90103, 87 0xBF8C0F70, 0xDC600000, 0x00000104, 0xD24A6A04, 0x00002104, 0xD2506A05, 88 0x01A90105, 0xBF82FFEE, 0xBF810000, 89 }; 90 91 static const unsigned int kCodeFill7[] = { 92 0xC0820100, 0xC0840104, 0xBF8C007F, 0x8F028602, 0x4A000002, 0x7E08020A, 93 0x7E0A020A, 0x7E0C020A, 0x7E0E020A, 0x8F0C840B, 0x34020084, 0x7E060205, 94 0xD24A6A02, 0x00000901, 0xD2506A03, 0x01A90103, 0xD1C2006A, 0x00000D02, 95 0xBF860007, 0xDC780000, 0x00000402, 0xD24A6A02, 0x00001902, 0xD2506A03, 96 0x01A90103, 0xBF82FFF6, 0x8F0C820B, 0x34020082, 0x7E060207, 0xD24A6A02, 97 0x00000D01, 0xD2506A03, 0x01A90103, 0xD1C2006A, 0x00001102, 0xBF860008, 98 0x87FE6A7E, 0xDC700000, 0x00000402, 0xD24A6A02, 0x00001902, 0xD2506A03, 99 0x01A90103, 0xBF82FFF5, 0xBF810000, 100 }; 101 102 static const unsigned int kCodeTrapHandler8[] = { 103 0xC0061C80, 0x000000C0, 0xBF8C007F, 0xBEFE0181, 0x80728872, 0x82738073, 104 0x7E000272, 0x7E020273, 0x7E0402FF, 0x80000000, 0x7E060280, 0xDD800000, 105 0x00000200, 0xBF8C0F70, 0x7DD40500, 0xBF870011, 0xC0061D39, 0x00000008, 106 0xBF8C007F, 0x86F47474, 0xBF84000C, 0x80729072, 0x82738073, 0xC0021CB9, 107 0x00000000, 0xBF8C007F, 0x7E000274, 0x7E020275, 0x7E040272, 0xDC700000, 108 0x00000200, 0xBF8C0F70, 0xBF900001, 0xBF8D0001, 0xBE801F70, 109 }; 110 111 static const unsigned int kCodeTrapHandler9[] = { 112 /* 113 var SQ_WAVE_PC_HI_TRAP_ID_SHIFT = 16 114 var SQ_WAVE_PC_HI_TRAP_ID_SIZE = 8 115 var SQ_WAVE_PC_HI_TRAP_ID_BFE = (SQ_WAVE_PC_HI_TRAP_ID_SHIFT | (SQ_WAVE_PC_HI_TRAP_ID_SIZE << 16)) 116 var SQ_WAVE_STATUS_HALT_MASK = 0x2000 117 var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x8000 118 var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 119 var IB_STS_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 120 121 // ABI between first and second level trap handler. 122 var s_trap_info_lo = ttmp0 123 var s_trap_info_hi = ttmp1 124 var s_ib_sts_save = ttmp11 // [31:26] = SQ_WAVE_IB_STS[20:15] 125 var s_status_save = ttmp12 126 127 // SPI debug data is not present/needed in these registers. 128 var s_tmp0 = ttmp2 129 var s_tmp1 = ttmp3 130 var s_tmp2 = ttmp4 131 var s_tmp3 = ttmp5 132 133 shader main 134 type(CS) 135 136 // If this is not a trap then return to the shader. 137 s_bfe_u32 s_tmp0, s_trap_info_hi, SQ_WAVE_PC_HI_TRAP_ID_BFE 138 s_cbranch_scc0 L_EXIT_TRAP 139 140 // If llvm.trap then signal queue error. 141 s_cmp_eq_u32 s_tmp0, 0x2 142 s_cbranch_scc1 L_SIGNAL_QUEUE 143 144 // For other traps advance PC and return to shader. 145 s_add_u32 s_trap_info_lo, s_trap_info_lo, 0x4 146 s_addc_u32 s_trap_info_hi, s_trap_info_hi, 0x0 147 s_branch L_EXIT_TRAP 148 149 L_SIGNAL_QUEUE: 150 // Retrieve queue_inactive_signal from amd_queue_t* passed in s[0:1]. 151 s_load_dwordx2 [s_tmp0, s_tmp1], s[0:1], 0xC0 glc:1 152 s_waitcnt lgkmcnt(0) 153 154 // Set queue signal value to unhandled exception error. 155 s_mov_b32 s_tmp2, 0x80000000 156 s_mov_b32 s_tmp3, 0x0 157 s_atomic_swap_x2 [s_tmp2, s_tmp3], [s_tmp0, s_tmp1], 0x8 glc:1 158 s_waitcnt lgkmcnt(0) 159 160 // Skip event trigger if the signal value was already non-zero. 161 s_or_b32 s_tmp2, s_tmp2, s_tmp3 162 s_cbranch_scc1 L_SIGNAL_DONE 163 164 // Check for a non-NULL signal event mailbox. 165 s_load_dwordx2 [s_tmp2, s_tmp3], [s_tmp0, s_tmp1], 0x10 glc:1 166 s_waitcnt lgkmcnt(0) 167 s_and_b64 [s_tmp2, s_tmp3], [s_tmp2, s_tmp3], [s_tmp2, s_tmp3] 168 s_cbranch_scc0 L_SIGNAL_DONE 169 170 // Load the signal event value. 171 s_load_dword s_tmp0, [s_tmp0, s_tmp1], 0x18 glc:1 172 s_waitcnt lgkmcnt(0) 173 174 // Write the signal event value to the mailbox. 175 s_store_dword s_tmp0, [s_tmp2, s_tmp3], 0x0 glc:1 176 s_waitcnt lgkmcnt(0) 177 178 // Send an interrupt to trigger event notification. 179 s_sendmsg sendmsg(MSG_INTERRUPT) 180 181 L_SIGNAL_DONE: 182 // Halt the wavefront. 183 s_or_b32 s_status_save, s_status_save, SQ_WAVE_STATUS_HALT_MASK 184 185 L_EXIT_TRAP: 186 // Restore SQ_WAVE_IB_STS. 187 s_lshr_b32 s_tmp0, s_ib_sts_save, (IB_STS_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) 188 s_and_b32 s_tmp0, s_tmp0, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK 189 s_setreg_b32 hwreg(HW_REG_IB_STS), s_tmp0 190 191 // Restore SQ_WAVE_STATUS. 192 s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 193 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 194 s_setreg_b32 hwreg(HW_REG_STATUS), s_status_save 195 196 // Return to shader at unmodified PC. 197 s_rfe_b64 [s_trap_info_lo, s_trap_info_hi] 198 end 199 */ 200 0x92eeff6d, 0x00080010, 0xbf84001e, 0xbf06826e, 0xbf850003, 0x806c846c, 201 0x826d806d, 0xbf820019, 0xc0071b80, 0x000000c0, 0xbf8cc07f, 0xbef000ff, 202 0x80000000, 0xbef10080, 0xc2831c37, 0x00000008, 0xbf8cc07f, 0x87707170, 203 0xbf85000c, 0xc0071c37, 0x00000010, 0xbf8cc07f, 0x86f07070, 0xbf840007, 204 0xc0031bb7, 0x00000018, 0xbf8cc07f, 0xc0431bb8, 0x00000000, 0xbf8cc07f, 205 0xbf900001, 0x8778ff78, 0x00002000, 0x8f6e8b77, 0x866eff6e, 0x00008000, 206 0xb96ef807, 0x86fe7e7e, 0x86ea6a6a, 0xb978f802, 0xbe801f6c, 0x00000000, 207 }; 208 209 static const unsigned int kCodeCopyAligned8[] = { 210 0xC00A0100, 0x00000000, 0xC00A0200, 0x00000010, 0xC00A0300, 0x00000020, 211 0xC00A0400, 0x00000030, 0xC00A0500, 0x00000040, 0xC0020600, 0x00000050, 212 0xBF8C007F, 0x8E028602, 0x32000002, 0x7E060205, 0xD1196A02, 0x00000900, 213 0xD11C6A03, 0x01A90103, 0x7E0A0207, 0xD1196A04, 0x00000D00, 0xD11C6A05, 214 0x01A90105, 0xD0E9006A, 0x00001102, 0xBF86000F, 0x86FE6A7E, 0xDC400000, 215 0x01000002, 0xBF8C0F70, 0xD1196A02, 0x00003102, 0xD11C6A03, 0x01A90103, 216 0xDC600000, 0x00000104, 0xD1196A04, 0x00003104, 0xD11C6A05, 0x01A90105, 217 0xBF82FFEE, 0xBEFE01C1, 0x8E198418, 0x24020084, 0x7E060209, 0xD1196A02, 218 0x00001101, 0xD11C6A03, 0x01A90103, 0x7E0A020B, 0xD1196A04, 0x00001501, 219 0xD11C6A05, 0x01A90105, 0xD0E9006A, 0x00001902, 0xBF86000E, 0xDC5C0000, 220 0x08000002, 0xD1196A02, 0x00003302, 0xD11C6A03, 0x01A90103, 0xBF8C0F70, 221 0xDC7C0000, 0x00000804, 0xD1196A04, 0x00003304, 0xD11C6A05, 0x01A90105, 222 0xBF82FFEF, 0x8E198218, 0x24020082, 0x7E06020D, 0xD1196A02, 0x00001901, 223 0xD11C6A03, 0x01A90103, 0x7E0A020F, 0xD1196A04, 0x00001D01, 0xD11C6A05, 224 0x01A90105, 0xD0E9006A, 0x00002102, 0xBF86000F, 0x86FE6A7E, 0xDC500000, 225 0x01000002, 0xD1196A02, 0x00003302, 0xD11C6A03, 0x01A90103, 0xBF8C0F70, 226 0xDC700000, 0x00000104, 0xD1196A04, 0x00003304, 0xD11C6A05, 0x01A90105, 227 0xBF82FFEE, 0xBEFE01C1, 0x7E060211, 0xD1196A02, 0x00002100, 0xD11C6A03, 228 0x01A90103, 0x7E0A0213, 0xD1196A04, 0x00002500, 0xD11C6A05, 0x01A90105, 229 0xD0E9006A, 0x00002902, 0xBF860006, 0x86FE6A7E, 0xDC400000, 0x01000002, 230 0xBF8C0F70, 0xDC600000, 0x00000104, 0xBF810000, 231 }; 232 233 static const unsigned int kCodeCopyMisaligned8[] = { 234 0xC00A0100, 0x00000000, 0xC00A0200, 0x00000010, 0xC00A0300, 0x00000020, 235 0xC0020400, 0x00000030, 0xBF8C007F, 0x8E028602, 0x32000002, 0x7E060205, 236 0xD1196A02, 0x00000900, 0xD11C6A03, 0x01A90103, 0x7E0A0207, 0xD1196A04, 237 0x00000D00, 0xD11C6A05, 0x01A90105, 0xD0E9006A, 0x00001102, 0xBF860032, 238 0xDC400000, 0x06000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103, 239 0xDC400000, 0x07000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103, 240 0xDC400000, 0x08000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103, 241 0xDC400000, 0x09000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103, 242 0xBF8C0F70, 0xDC600000, 0x00000604, 0xD1196A04, 0x00002104, 0xD11C6A05, 243 0x01A90105, 0xDC600000, 0x00000704, 0xD1196A04, 0x00002104, 0xD11C6A05, 244 0x01A90105, 0xDC600000, 0x00000804, 0xD1196A04, 0x00002104, 0xD11C6A05, 245 0x01A90105, 0xDC600000, 0x00000904, 0xD1196A04, 0x00002104, 0xD11C6A05, 246 0x01A90105, 0xBF82FFCB, 0x7E060209, 0xD1196A02, 0x00001100, 0xD11C6A03, 247 0x01A90103, 0x7E0A020B, 0xD1196A04, 0x00001500, 0xD11C6A05, 0x01A90105, 248 0xD0E9006A, 0x00001902, 0xBF86000F, 0x86FE6A7E, 0xDC400000, 0x01000002, 249 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103, 0xBF8C0F70, 0xDC600000, 250 0x00000104, 0xD1196A04, 0x00002104, 0xD11C6A05, 0x01A90105, 0xBF82FFEE, 251 0xBF810000, 252 }; 253 254 static const unsigned int kCodeFill8[] = { 255 0xC00A0100, 0x00000000, 0xC00A0200, 0x00000010, 0xBF8C007F, 0x8E028602, 256 0x32000002, 0x7E08020A, 0x7E0A020A, 0x7E0C020A, 0x7E0E020A, 0x8E0C840B, 257 0x24020084, 0x7E060205, 0xD1196A02, 0x00000901, 0xD11C6A03, 0x01A90103, 258 0xD0E9006A, 0x00000D02, 0xBF860007, 0xDC7C0000, 0x00000402, 0xD1196A02, 259 0x00001902, 0xD11C6A03, 0x01A90103, 0xBF82FFF6, 0x8E0C820B, 0x24020082, 260 0x7E060207, 0xD1196A02, 0x00000D01, 0xD11C6A03, 0x01A90103, 0xD0E9006A, 261 0x00001102, 0xBF860008, 0x86FE6A7E, 0xDC700000, 0x00000402, 0xD1196A02, 262 0x00001902, 0xD11C6A03, 0x01A90103, 0xBF82FFF5, 0xBF810000, 263 }; 264 265 } // namespace amd 266 267 #endif // header guard 268