1 /////////////////////////////////////////////////////////////////////////
2 // $Id: icache.cc 14113 2021-01-31 14:03:28Z sshwarts $
3 /////////////////////////////////////////////////////////////////////////
4 //
5 // Copyright (c) 2007-2015 Stanislav Shwartsman
6 // Written by Stanislav Shwartsman [sshwarts at sourceforge net]
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2 of the License, or (at your option) any later version.
12 //
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
21 //
22 /////////////////////////////////////////////////////////////////////////
23
24 #define NEED_CPU_REG_SHORTCUTS 1
25 #include "bochs.h"
26 #include "cpu.h"
27 #define LOG_THIS BX_CPU_THIS_PTR
28
29 #include "gui/siminterface.h"
30 #include "param_names.h"
31 #include "cpustats.h"
32
33 #include "decoder/ia_opcodes.h"
34
35 bxPageWriteStampTable pageWriteStampTable;
36
37 extern int fetchDecode32(const Bit8u *fetchPtr, bool is_32, bxInstruction_c *i, unsigned remainingInPage);
38 #if BX_SUPPORT_X86_64
39 extern int fetchDecode64(const Bit8u *fetchPtr, bxInstruction_c *i, unsigned remainingInPage);
40 #endif
41 extern int assignHandler(bxInstruction_c *i, Bit32u fetchModeMask);
42
flushICaches(void)43 void flushICaches(void)
44 {
45 for (unsigned i=0; i<BX_SMP_PROCESSORS; i++) {
46 BX_CPU(i)->iCache.flushICacheEntries();
47 BX_CPU(i)->async_event |= BX_ASYNC_EVENT_STOP_TRACE;
48 }
49
50 pageWriteStampTable.resetWriteStamps();
51 }
52
handleSMC(bx_phy_address pAddr,Bit32u mask)53 void handleSMC(bx_phy_address pAddr, Bit32u mask)
54 {
55 INC_SMC_STAT(smc);
56
57 for (unsigned i=0; i<BX_SMP_PROCESSORS; i++) {
58 BX_CPU(i)->async_event |= BX_ASYNC_EVENT_STOP_TRACE;
59 BX_CPU(i)->iCache.handleSMC(pAddr, mask);
60 }
61 }
62
63 #if BX_SUPPORT_HANDLERS_CHAINING_SPEEDUPS
64
BxEndTrace(bxInstruction_c * i)65 void BX_CPU_C::BxEndTrace(bxInstruction_c *i)
66 {
67 // do nothing, return to main cpu_loop
68 }
69
genDummyICacheEntry(bxInstruction_c * i)70 void genDummyICacheEntry(bxInstruction_c *i)
71 {
72 i->setILen(0);
73 i->setIaOpcode(BX_INSERTED_OPCODE);
74 i->execute1 = &BX_CPU_C::BxEndTrace;
75 }
76
77 #endif
78
serveICacheMiss(Bit32u eipBiased,bx_phy_address pAddr)79 bxICacheEntry_c* BX_CPU_C::serveICacheMiss(Bit32u eipBiased, bx_phy_address pAddr)
80 {
81 bxICacheEntry_c *entry = BX_CPU_THIS_PTR iCache.get_entry(pAddr, BX_CPU_THIS_PTR fetchModeMask);
82
83 BX_CPU_THIS_PTR iCache.alloc_trace(entry);
84
85 // Cache miss. We weren't so lucky, but let's be optimistic - try to build
86 // trace from incoming instruction bytes stream !
87 entry->pAddr = pAddr;
88 entry->traceMask = 0;
89
90 unsigned remainingInPage = BX_CPU_THIS_PTR eipPageWindowSize - eipBiased;
91 const Bit8u *fetchPtr = BX_CPU_THIS_PTR eipFetchPtr + eipBiased;
92 int ret;
93
94 bxInstruction_c *i = entry->i;
95
96 Bit32u pageOffset = PAGE_OFFSET((Bit32u) pAddr);
97 Bit32u traceMask = 0;
98
99 #if BX_SUPPORT_SMP == 0
100 if (PPFOf(pAddr) == BX_CPU_THIS_PTR pAddrStackPage)
101 invalidate_stack_cache();
102 #endif
103
104 // Don't allow traces longer than cpu_loop can execute
105 static unsigned quantum =
106 #if BX_SUPPORT_SMP
107 (BX_SMP_PROCESSORS > 1) ? SIM->get_param_num(BXPN_SMP_QUANTUM)->get() :
108 #endif
109 BX_MAX_TRACE_LENGTH;
110
111 for (unsigned n=0;n < quantum;n++)
112 {
113 #if BX_SUPPORT_X86_64
114 if (BX_CPU_THIS_PTR cpu_mode == BX_MODE_LONG_64)
115 ret = fetchDecode64(fetchPtr, i, remainingInPage);
116 else
117 #endif
118 ret = fetchDecode32(fetchPtr, BX_CPU_THIS_PTR sregs[BX_SEG_REG_CS].cache.u.segment.d_b, i, remainingInPage);
119
120 if (ret < 0) {
121 // Fetching instruction on segment/page boundary
122 if (n > 0) {
123 // The trace is already valid, it has several instructions inside,
124 // in this case just drop the boundary instruction and stop
125 // tracing.
126 break;
127 }
128 // First instruction is boundary fetch, leave the trace cache entry
129 // invalid for now because boundaryFetch() can fault
130 entry->pAddr = ~entry->pAddr;
131 entry->tlen = 1;
132 boundaryFetch(fetchPtr, remainingInPage, i);
133
134 // Add the instruction to trace cache
135 entry->pAddr = ~entry->pAddr;
136 entry->traceMask = 0x80000000; /* last line in page */
137 pageWriteStampTable.markICacheMask(entry->pAddr, entry->traceMask);
138 pageWriteStampTable.markICacheMask(BX_CPU_THIS_PTR pAddrFetchPage, 0x1);
139
140 #if BX_SUPPORT_HANDLERS_CHAINING_SPEEDUPS
141 entry->tlen++; /* Add the inserted end of trace opcode */
142 genDummyICacheEntry(++i);
143 #endif
144
145 BX_CPU_THIS_PTR iCache.commit_page_split_trace(BX_CPU_THIS_PTR pAddrFetchPage, entry);
146 return entry;
147 }
148
149 ret = assignHandler(i, BX_CPU_THIS_PTR fetchModeMask);
150
151 // add instruction to the trace
152 unsigned iLen = i->ilen();
153 entry->tlen++;
154
155 #ifdef BX_INSTR_STORE_OPCODE_BYTES
156 i->set_opcode_bytes(fetchPtr);
157 #endif
158 BX_INSTR_OPCODE(BX_CPU_ID, i, fetchPtr, iLen,
159 BX_CPU_THIS_PTR sregs[BX_SEG_REG_CS].cache.u.segment.d_b, long64_mode());
160
161 i++;
162
163 traceMask |= 1 << (pageOffset >> 7);
164 traceMask |= 1 << ((pageOffset + iLen - 1) >> 7);
165
166 // continue to the next instruction
167 remainingInPage -= iLen;
168 if (ret != 0 /* stop trace indication */ || remainingInPage == 0) break;
169 pAddr += iLen;
170 pageOffset += iLen;
171 fetchPtr += iLen;
172
173 // try to find a trace starting from current pAddr and merge
174 if (remainingInPage >= 15) { // avoid merging with page split trace
175 if (mergeTraces(entry, i, pAddr)) {
176 entry->traceMask |= traceMask;
177 pageWriteStampTable.markICacheMask(pAddr, entry->traceMask);
178 BX_CPU_THIS_PTR iCache.commit_trace(entry->tlen);
179 return entry;
180 }
181 }
182 }
183
184 //BX_INFO(("commit trace %08x len=%d mask %08x", (Bit32u) entry->pAddr, entry->tlen, pageWriteStampTable.getFineGranularityMapping(entry->pAddr)));
185
186 entry->traceMask |= traceMask;
187
188 pageWriteStampTable.markICacheMask(pAddr, entry->traceMask);
189
190 #if BX_SUPPORT_HANDLERS_CHAINING_SPEEDUPS
191 entry->tlen++; /* Add the inserted end of trace opcode */
192 genDummyICacheEntry(i);
193 #endif
194
195 BX_CPU_THIS_PTR iCache.commit_trace(entry->tlen);
196
197 return entry;
198 }
199
mergeTraces(bxICacheEntry_c * entry,bxInstruction_c * i,bx_phy_address pAddr)200 bool BX_CPU_C::mergeTraces(bxICacheEntry_c *entry, bxInstruction_c *i, bx_phy_address pAddr)
201 {
202 bxICacheEntry_c *e = BX_CPU_THIS_PTR iCache.find_entry(pAddr, BX_CPU_THIS_PTR fetchModeMask);
203
204 if (e != NULL)
205 {
206 // determine max amount of instruction to take from another entry
207 unsigned max_length = e->tlen;
208
209 #if BX_SUPPORT_HANDLERS_CHAINING_SPEEDUPS
210 if (max_length + entry->tlen > BX_MAX_TRACE_LENGTH)
211 return 0;
212 #else
213 if (max_length + entry->tlen > BX_MAX_TRACE_LENGTH)
214 max_length = BX_MAX_TRACE_LENGTH - entry->tlen;
215 if(max_length == 0) return 0;
216 #endif
217
218 memcpy(i, e->i, sizeof(bxInstruction_c)*max_length);
219 entry->tlen += max_length;
220 BX_ASSERT(entry->tlen <= BX_MAX_TRACE_LENGTH);
221
222 entry->traceMask |= e->traceMask;
223
224 return 1;
225 }
226
227 return 0;
228 }
229
boundaryFetch(const Bit8u * fetchPtr,unsigned remainingInPage,bxInstruction_c * i)230 void BX_CPU_C::boundaryFetch(const Bit8u *fetchPtr, unsigned remainingInPage, bxInstruction_c *i)
231 {
232 unsigned j, k;
233 Bit8u fetchBuffer[32];
234 int ret;
235
236 if (remainingInPage >= 15) {
237 BX_ERROR(("boundaryFetch #GP(0): too many instruction prefixes"));
238 exception(BX_GP_EXCEPTION, 0);
239 }
240
241 // Read all leftover bytes in current page up to boundary.
242 for (j=0; j<remainingInPage; j++) {
243 fetchBuffer[j] = *fetchPtr++;
244 }
245
246 // The 2nd chunk of the instruction is on the next page.
247 // Set RIP to the 0th byte of the 2nd page, and force a
248 // prefetch so direct access of that physical page is possible, and
249 // all the associated info is updated.
250 RIP += remainingInPage;
251 prefetch();
252
253 unsigned fetchBufferLimit = 15;
254 if (BX_CPU_THIS_PTR eipPageWindowSize < 15) {
255 BX_DEBUG(("boundaryFetch: small window size after prefetch=%d bytes, remainingInPage=%d bytes", BX_CPU_THIS_PTR eipPageWindowSize, remainingInPage));
256 fetchBufferLimit = BX_CPU_THIS_PTR eipPageWindowSize;
257 }
258
259 // We can fetch straight from the 0th byte, which is eipFetchPtr;
260 fetchPtr = BX_CPU_THIS_PTR eipFetchPtr;
261
262 // read leftover bytes in next page
263 for (k=0; k<fetchBufferLimit; k++, j++) {
264 fetchBuffer[j] = *fetchPtr++;
265 }
266
267 #if BX_SUPPORT_X86_64
268 if (BX_CPU_THIS_PTR cpu_mode == BX_MODE_LONG_64)
269 ret = fetchDecode64(fetchBuffer, i, remainingInPage+fetchBufferLimit);
270 else
271 #endif
272 ret = fetchDecode32(fetchBuffer, BX_CPU_THIS_PTR sregs[BX_SEG_REG_CS].cache.u.segment.d_b, i, remainingInPage+fetchBufferLimit);
273
274 if (ret < 0) {
275 BX_INFO(("boundaryFetch #GP(0): failed to complete instruction decoding"));
276 exception(BX_GP_EXCEPTION, 0);
277 }
278
279 ret = assignHandler(i, BX_CPU_THIS_PTR fetchModeMask);
280
281 // Restore EIP since we fudged it to start at the 2nd page boundary.
282 RIP = BX_CPU_THIS_PTR prev_rip;
283
284 // Since we cross an instruction boundary, note that we need a prefetch()
285 // again on the next instruction. Perhaps we can optimize this to
286 // eliminate the extra prefetch() since we do it above, but have to
287 // think about repeated instructions, etc.
288 // invalidate_prefetch_q();
289
290 #ifdef BX_INSTR_STORE_OPCODE_BYTES
291 i->set_opcode_bytes(fetchBuffer);
292 #endif
293
294 BX_INSTR_OPCODE(BX_CPU_ID, i, fetchBuffer, i->ilen(),
295 BX_CPU_THIS_PTR sregs[BX_SEG_REG_CS].cache.u.segment.d_b, long64_mode());
296 }
297