1 ////////////////////////////////////////////////////////////////////////////////
2 //
3 // The University of Illinois/NCSA
4 // Open Source License (NCSA)
5 //
6 // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
7 //
8 // Developed by:
9 //
10 //                 AMD Research and AMD HSA Software Development
11 //
12 //                 Advanced Micro Devices, Inc.
13 //
14 //                 www.amd.com
15 //
16 // Permission is hereby granted, free of charge, to any person obtaining a copy
17 // of this software and associated documentation files (the "Software"), to
18 // deal with the Software without restriction, including without limitation
19 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
20 // and/or sell copies of the Software, and to permit persons to whom the
21 // Software is furnished to do so, subject to the following conditions:
22 //
23 //  - Redistributions of source code must retain the above copyright notice,
24 //    this list of conditions and the following disclaimers.
25 //  - Redistributions in binary form must reproduce the above copyright
26 //    notice, this list of conditions and the following disclaimers in
27 //    the documentation and/or other materials provided with the distribution.
28 //  - Neither the names of Advanced Micro Devices, Inc,
29 //    nor the names of its contributors may be used to endorse or promote
30 //    products derived from this Software without specific prior written
31 //    permission.
32 //
33 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
36 // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
37 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
38 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
39 // DEALINGS WITH THE SOFTWARE.
40 //
41 ////////////////////////////////////////////////////////////////////////////////
42 
43 #ifndef HSA_RUNTIME_CORE_INC_AMD_GPU_SHADERS_H_
44 #define HSA_RUNTIME_CORE_INC_AMD_GPU_SHADERS_H_
45 
46 namespace amd {
47 
48 static const unsigned int kCodeCopyAligned7[] = {
49     0xC0820100, 0xC0840104, 0xC0860108, 0xC088010C, 0xC08A0110, 0xC00C0114,
50     0xBF8C007F, 0x8F028602, 0x4A000002, 0x7E060205, 0xD24A6A02, 0x00000900,
51     0xD2506A03, 0x01A90103, 0x7E0A0207, 0xD24A6A04, 0x00000D00, 0xD2506A05,
52     0x01A90105, 0xD1C2006A, 0x00001102, 0xBF86000F, 0x87FE6A7E, 0xDC200000,
53     0x01000002, 0xBF8C0F70, 0xD24A6A02, 0x00003102, 0xD2506A03, 0x01A90103,
54     0xDC600000, 0x00000104, 0xD24A6A04, 0x00003104, 0xD2506A05, 0x01A90105,
55     0xBF82FFEE, 0xBEFE04C1, 0x8F198418, 0x34020084, 0x7E060209, 0xD24A6A02,
56     0x00001101, 0xD2506A03, 0x01A90103, 0x7E0A020B, 0xD24A6A04, 0x00001501,
57     0xD2506A05, 0x01A90105, 0xD1C2006A, 0x00001902, 0xBF86000E, 0xDC380000,
58     0x08000002, 0xD24A6A02, 0x00003302, 0xD2506A03, 0x01A90103, 0xBF8C0F70,
59     0xDC780000, 0x00000804, 0xD24A6A04, 0x00003304, 0xD2506A05, 0x01A90105,
60     0xBF82FFEF, 0x8F198218, 0x34020082, 0x7E06020D, 0xD24A6A02, 0x00001901,
61     0xD2506A03, 0x01A90103, 0x7E0A020F, 0xD24A6A04, 0x00001D01, 0xD2506A05,
62     0x01A90105, 0xD1C2006A, 0x00002102, 0xBF86000F, 0x87FE6A7E, 0xDC300000,
63     0x01000002, 0xD24A6A02, 0x00003302, 0xD2506A03, 0x01A90103, 0xBF8C0F70,
64     0xDC700000, 0x00000104, 0xD24A6A04, 0x00003304, 0xD2506A05, 0x01A90105,
65     0xBF82FFEE, 0xBEFE04C1, 0x7E060211, 0xD24A6A02, 0x00002100, 0xD2506A03,
66     0x01A90103, 0x7E0A0213, 0xD24A6A04, 0x00002500, 0xD2506A05, 0x01A90105,
67     0xD1C2006A, 0x00002902, 0xBF860006, 0x87FE6A7E, 0xDC200000, 0x01000002,
68     0xBF8C0F70, 0xDC600000, 0x00000104, 0xBF810000,
69 };
70 
71 static const unsigned int kCodeCopyMisaligned7[] = {
72     0xC0820100, 0xC0840104, 0xC0860108, 0xC008010C, 0xBF8C007F, 0x8F028602,
73     0x4A000002, 0x7E060205, 0xD24A6A02, 0x00000900, 0xD2506A03, 0x01A90103,
74     0x7E0A0207, 0xD24A6A04, 0x00000D00, 0xD2506A05, 0x01A90105, 0xD1C2006A,
75     0x00001102, 0xBF860032, 0xDC200000, 0x06000002, 0xD24A6A02, 0x00002102,
76     0xD2506A03, 0x01A90103, 0xDC200000, 0x07000002, 0xD24A6A02, 0x00002102,
77     0xD2506A03, 0x01A90103, 0xDC200000, 0x08000002, 0xD24A6A02, 0x00002102,
78     0xD2506A03, 0x01A90103, 0xDC200000, 0x09000002, 0xD24A6A02, 0x00002102,
79     0xD2506A03, 0x01A90103, 0xBF8C0F70, 0xDC600000, 0x00000604, 0xD24A6A04,
80     0x00002104, 0xD2506A05, 0x01A90105, 0xDC600000, 0x00000704, 0xD24A6A04,
81     0x00002104, 0xD2506A05, 0x01A90105, 0xDC600000, 0x00000804, 0xD24A6A04,
82     0x00002104, 0xD2506A05, 0x01A90105, 0xDC600000, 0x00000904, 0xD24A6A04,
83     0x00002104, 0xD2506A05, 0x01A90105, 0xBF82FFCB, 0x7E060209, 0xD24A6A02,
84     0x00001100, 0xD2506A03, 0x01A90103, 0x7E0A020B, 0xD24A6A04, 0x00001500,
85     0xD2506A05, 0x01A90105, 0xD1C2006A, 0x00001902, 0xBF86000F, 0x87FE6A7E,
86     0xDC200000, 0x01000002, 0xD24A6A02, 0x00002102, 0xD2506A03, 0x01A90103,
87     0xBF8C0F70, 0xDC600000, 0x00000104, 0xD24A6A04, 0x00002104, 0xD2506A05,
88     0x01A90105, 0xBF82FFEE, 0xBF810000,
89 };
90 
91 static const unsigned int kCodeFill7[] = {
92     0xC0820100, 0xC0840104, 0xBF8C007F, 0x8F028602, 0x4A000002, 0x7E08020A,
93     0x7E0A020A, 0x7E0C020A, 0x7E0E020A, 0x8F0C840B, 0x34020084, 0x7E060205,
94     0xD24A6A02, 0x00000901, 0xD2506A03, 0x01A90103, 0xD1C2006A, 0x00000D02,
95     0xBF860007, 0xDC780000, 0x00000402, 0xD24A6A02, 0x00001902, 0xD2506A03,
96     0x01A90103, 0xBF82FFF6, 0x8F0C820B, 0x34020082, 0x7E060207, 0xD24A6A02,
97     0x00000D01, 0xD2506A03, 0x01A90103, 0xD1C2006A, 0x00001102, 0xBF860008,
98     0x87FE6A7E, 0xDC700000, 0x00000402, 0xD24A6A02, 0x00001902, 0xD2506A03,
99     0x01A90103, 0xBF82FFF5, 0xBF810000,
100 };
101 
102 static const unsigned int kCodeTrapHandler8[] = {
103     0xC0061C80, 0x000000C0, 0xBF8C007F, 0xBEFE0181, 0x80728872, 0x82738073,
104     0x7E000272, 0x7E020273, 0x7E0402FF, 0x80000000, 0x7E060280, 0xDD800000,
105     0x00000200, 0xBF8C0F70, 0x7DD40500, 0xBF870011, 0xC0061D39, 0x00000008,
106     0xBF8C007F, 0x86F47474, 0xBF84000C, 0x80729072, 0x82738073, 0xC0021CB9,
107     0x00000000, 0xBF8C007F, 0x7E000274, 0x7E020275, 0x7E040272, 0xDC700000,
108     0x00000200, 0xBF8C0F70, 0xBF900001, 0xBF8D0001, 0xBE801F70,
109 };
110 
111 static const unsigned int kCodeTrapHandler9[] = {
112 /*
113   var SQ_WAVE_PC_HI_TRAP_ID_SHIFT           = 16
114   var SQ_WAVE_PC_HI_TRAP_ID_SIZE            = 8
115   var SQ_WAVE_PC_HI_TRAP_ID_BFE             = (SQ_WAVE_PC_HI_TRAP_ID_SHIFT | (SQ_WAVE_PC_HI_TRAP_ID_SIZE << 16))
116   var SQ_WAVE_STATUS_HALT_MASK              = 0x2000
117   var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x8000
118   var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT     = 15
119   var IB_STS_SAVE_RCNT_FIRST_REPLAY_SHIFT   = 26
120 
121   // ABI between first and second level trap handler.
122   var s_trap_info_lo = ttmp0
123   var s_trap_info_hi = ttmp1
124   var s_ib_sts_save  = ttmp11 // [31:26] = SQ_WAVE_IB_STS[20:15]
125   var s_status_save  = ttmp12
126 
127   // SPI debug data is not present/needed in these registers.
128   var s_tmp0         = ttmp2
129   var s_tmp1         = ttmp3
130   var s_tmp2         = ttmp4
131   var s_tmp3         = ttmp5
132 
133   shader main
134     type(CS)
135 
136     // If this is not a trap then return to the shader.
137     s_bfe_u32            s_tmp0, s_trap_info_hi, SQ_WAVE_PC_HI_TRAP_ID_BFE
138     s_cbranch_scc0       L_EXIT_TRAP
139 
140     // If llvm.trap then signal queue error.
141     s_cmp_eq_u32         s_tmp0, 0x2
142     s_cbranch_scc1       L_SIGNAL_QUEUE
143 
144     // For other traps advance PC and return to shader.
145     s_add_u32            s_trap_info_lo, s_trap_info_lo, 0x4
146     s_addc_u32           s_trap_info_hi, s_trap_info_hi, 0x0
147     s_branch             L_EXIT_TRAP
148 
149   L_SIGNAL_QUEUE:
150     // Retrieve queue_inactive_signal from amd_queue_t* passed in s[0:1].
151     s_load_dwordx2       [s_tmp0, s_tmp1], s[0:1], 0xC0 glc:1
152     s_waitcnt            lgkmcnt(0)
153 
154     // Set queue signal value to unhandled exception error.
155     s_mov_b32            s_tmp2, 0x80000000
156     s_mov_b32            s_tmp3, 0x0
157     s_atomic_swap_x2     [s_tmp2, s_tmp3], [s_tmp0, s_tmp1], 0x8 glc:1
158     s_waitcnt            lgkmcnt(0)
159 
160     // Skip event trigger if the signal value was already non-zero.
161     s_or_b32             s_tmp2, s_tmp2, s_tmp3
162     s_cbranch_scc1       L_SIGNAL_DONE
163 
164     // Check for a non-NULL signal event mailbox.
165     s_load_dwordx2       [s_tmp2, s_tmp3], [s_tmp0, s_tmp1], 0x10 glc:1
166     s_waitcnt            lgkmcnt(0)
167     s_and_b64            [s_tmp2, s_tmp3], [s_tmp2, s_tmp3], [s_tmp2, s_tmp3]
168     s_cbranch_scc0       L_SIGNAL_DONE
169 
170     // Load the signal event value.
171     s_load_dword         s_tmp0, [s_tmp0, s_tmp1], 0x18 glc:1
172     s_waitcnt            lgkmcnt(0)
173 
174     // Write the signal event value to the mailbox.
175     s_store_dword        s_tmp0, [s_tmp2, s_tmp3], 0x0 glc:1
176     s_waitcnt            lgkmcnt(0)
177 
178     // Send an interrupt to trigger event notification.
179     s_sendmsg            sendmsg(MSG_INTERRUPT)
180 
181   L_SIGNAL_DONE:
182     // Halt the wavefront.
183     s_or_b32             s_status_save, s_status_save, SQ_WAVE_STATUS_HALT_MASK
184 
185   L_EXIT_TRAP:
186     // Restore SQ_WAVE_IB_STS.
187     s_lshr_b32           s_tmp0, s_ib_sts_save, (IB_STS_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
188     s_and_b32            s_tmp0, s_tmp0, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
189     s_setreg_b32         hwreg(HW_REG_IB_STS), s_tmp0
190 
191     // Restore SQ_WAVE_STATUS.
192     s_and_b64            exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
193     s_and_b64            vcc, vcc, vcc    // Restore STATUS.VCCZ, not writable by s_setreg_b32
194     s_setreg_b32         hwreg(HW_REG_STATUS), s_status_save
195 
196     // Return to shader at unmodified PC.
197     s_rfe_b64            [s_trap_info_lo, s_trap_info_hi]
198   end
199 */
200     0x92eeff6d, 0x00080010, 0xbf84001e, 0xbf06826e, 0xbf850003, 0x806c846c,
201     0x826d806d, 0xbf820019, 0xc0071b80, 0x000000c0, 0xbf8cc07f, 0xbef000ff,
202     0x80000000, 0xbef10080, 0xc2831c37, 0x00000008, 0xbf8cc07f, 0x87707170,
203     0xbf85000c, 0xc0071c37, 0x00000010, 0xbf8cc07f, 0x86f07070, 0xbf840007,
204     0xc0031bb7, 0x00000018, 0xbf8cc07f, 0xc0431bb8, 0x00000000, 0xbf8cc07f,
205     0xbf900001, 0x8778ff78, 0x00002000, 0x8f6e8b77, 0x866eff6e, 0x00008000,
206     0xb96ef807, 0x86fe7e7e, 0x86ea6a6a, 0xb978f802, 0xbe801f6c, 0x00000000,
207 };
208 
209 static const unsigned int kCodeCopyAligned8[] = {
210     0xC00A0100, 0x00000000, 0xC00A0200, 0x00000010, 0xC00A0300, 0x00000020,
211     0xC00A0400, 0x00000030, 0xC00A0500, 0x00000040, 0xC0020600, 0x00000050,
212     0xBF8C007F, 0x8E028602, 0x32000002, 0x7E060205, 0xD1196A02, 0x00000900,
213     0xD11C6A03, 0x01A90103, 0x7E0A0207, 0xD1196A04, 0x00000D00, 0xD11C6A05,
214     0x01A90105, 0xD0E9006A, 0x00001102, 0xBF86000F, 0x86FE6A7E, 0xDC400000,
215     0x01000002, 0xBF8C0F70, 0xD1196A02, 0x00003102, 0xD11C6A03, 0x01A90103,
216     0xDC600000, 0x00000104, 0xD1196A04, 0x00003104, 0xD11C6A05, 0x01A90105,
217     0xBF82FFEE, 0xBEFE01C1, 0x8E198418, 0x24020084, 0x7E060209, 0xD1196A02,
218     0x00001101, 0xD11C6A03, 0x01A90103, 0x7E0A020B, 0xD1196A04, 0x00001501,
219     0xD11C6A05, 0x01A90105, 0xD0E9006A, 0x00001902, 0xBF86000E, 0xDC5C0000,
220     0x08000002, 0xD1196A02, 0x00003302, 0xD11C6A03, 0x01A90103, 0xBF8C0F70,
221     0xDC7C0000, 0x00000804, 0xD1196A04, 0x00003304, 0xD11C6A05, 0x01A90105,
222     0xBF82FFEF, 0x8E198218, 0x24020082, 0x7E06020D, 0xD1196A02, 0x00001901,
223     0xD11C6A03, 0x01A90103, 0x7E0A020F, 0xD1196A04, 0x00001D01, 0xD11C6A05,
224     0x01A90105, 0xD0E9006A, 0x00002102, 0xBF86000F, 0x86FE6A7E, 0xDC500000,
225     0x01000002, 0xD1196A02, 0x00003302, 0xD11C6A03, 0x01A90103, 0xBF8C0F70,
226     0xDC700000, 0x00000104, 0xD1196A04, 0x00003304, 0xD11C6A05, 0x01A90105,
227     0xBF82FFEE, 0xBEFE01C1, 0x7E060211, 0xD1196A02, 0x00002100, 0xD11C6A03,
228     0x01A90103, 0x7E0A0213, 0xD1196A04, 0x00002500, 0xD11C6A05, 0x01A90105,
229     0xD0E9006A, 0x00002902, 0xBF860006, 0x86FE6A7E, 0xDC400000, 0x01000002,
230     0xBF8C0F70, 0xDC600000, 0x00000104, 0xBF810000,
231 };
232 
233 static const unsigned int kCodeCopyMisaligned8[] = {
234     0xC00A0100, 0x00000000, 0xC00A0200, 0x00000010, 0xC00A0300, 0x00000020,
235     0xC0020400, 0x00000030, 0xBF8C007F, 0x8E028602, 0x32000002, 0x7E060205,
236     0xD1196A02, 0x00000900, 0xD11C6A03, 0x01A90103, 0x7E0A0207, 0xD1196A04,
237     0x00000D00, 0xD11C6A05, 0x01A90105, 0xD0E9006A, 0x00001102, 0xBF860032,
238     0xDC400000, 0x06000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103,
239     0xDC400000, 0x07000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103,
240     0xDC400000, 0x08000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103,
241     0xDC400000, 0x09000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103,
242     0xBF8C0F70, 0xDC600000, 0x00000604, 0xD1196A04, 0x00002104, 0xD11C6A05,
243     0x01A90105, 0xDC600000, 0x00000704, 0xD1196A04, 0x00002104, 0xD11C6A05,
244     0x01A90105, 0xDC600000, 0x00000804, 0xD1196A04, 0x00002104, 0xD11C6A05,
245     0x01A90105, 0xDC600000, 0x00000904, 0xD1196A04, 0x00002104, 0xD11C6A05,
246     0x01A90105, 0xBF82FFCB, 0x7E060209, 0xD1196A02, 0x00001100, 0xD11C6A03,
247     0x01A90103, 0x7E0A020B, 0xD1196A04, 0x00001500, 0xD11C6A05, 0x01A90105,
248     0xD0E9006A, 0x00001902, 0xBF86000F, 0x86FE6A7E, 0xDC400000, 0x01000002,
249     0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103, 0xBF8C0F70, 0xDC600000,
250     0x00000104, 0xD1196A04, 0x00002104, 0xD11C6A05, 0x01A90105, 0xBF82FFEE,
251     0xBF810000,
252 };
253 
254 static const unsigned int kCodeFill8[] = {
255     0xC00A0100, 0x00000000, 0xC00A0200, 0x00000010, 0xBF8C007F, 0x8E028602,
256     0x32000002, 0x7E08020A, 0x7E0A020A, 0x7E0C020A, 0x7E0E020A, 0x8E0C840B,
257     0x24020084, 0x7E060205, 0xD1196A02, 0x00000901, 0xD11C6A03, 0x01A90103,
258     0xD0E9006A, 0x00000D02, 0xBF860007, 0xDC7C0000, 0x00000402, 0xD1196A02,
259     0x00001902, 0xD11C6A03, 0x01A90103, 0xBF82FFF6, 0x8E0C820B, 0x24020082,
260     0x7E060207, 0xD1196A02, 0x00000D01, 0xD11C6A03, 0x01A90103, 0xD0E9006A,
261     0x00001102, 0xBF860008, 0x86FE6A7E, 0xDC700000, 0x00000402, 0xD1196A02,
262     0x00001902, 0xD11C6A03, 0x01A90103, 0xBF82FFF5, 0xBF810000,
263 };
264 
265 }  // namespace amd
266 
267 #endif  // header guard
268