1 #ifndef	CPU_H
2 #define	CPU_H
3 
4 /*
5  *  Copyright (C) 2005-2019  Anders Gavare.  All rights reserved.
6  *
7  *  Redistribution and use in source and binary forms, with or without
8  *  modification, are permitted provided that the following conditions are met:
9  *
10  *  1. Redistributions of source code must retain the above copyright
11  *     notice, this list of conditions and the following disclaimer.
12  *  2. Redistributions in binary form must reproduce the above copyright
13  *     notice, this list of conditions and the following disclaimer in the
14  *     documentation and/or other materials provided with the distribution.
15  *  3. The name of the author may not be used to endorse or promote products
16  *     derived from this software without specific prior written permission.
17  *
18  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  *  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  *  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  *  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  *  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  *  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  *  SUCH DAMAGE.
29  *
30  *
31  *  CPU-related definitions.
32  */
33 
34 
35 #include <sys/types.h>
36 #include <inttypes.h>
37 #include <sys/time.h>
38 
39 /*  This is needed for undefining 'mips', 'ppc' etc. on weird systems:  */
40 #include "../../config.h"
41 
42 #include "timer.h"
43 
44 
45 /*
46  *  Dyntrans misc declarations, used throughout the dyntrans code.
47  *
48  *  Note that there is space for all instruction calls within a page, and then
49  *  two more. The first one of these "extra" instruction slots is the end-of-
50  *  page slot. It transfers control to the first instruction slot on the next
51  *  (virtual) page.
52  *
53  *  The second of these extra instruction slots is an additional end-of-page
54  *  slot for delay-slot architectures. On e.g. MIPS, a branch instruction can
55  *  "nullify" (skip) the delay-slot. If the end-of-page slot is skipped, then
56  *  we end up one step after that. That's where the end_of_page2 slot is. :)
57  *
58  *  next_ofs points to the next page in a chain of possible pages. (Several
59  *  pages can be in the same chain, but only one matches the specific physaddr.)
60  *
61  *  translations_bitmap is a tiny bitmap indicating which parts of the page have
62  *  actual translations. Bit 0 corresponds to the lowest 1/32th of the page, bit
63  *  1 to the second-lowest 1/32th, and so on. This speeds up page invalidations,
64  *  since only part of the page need to be reset.
65  *
66  *  translation_ranges_ofs is an offset within the translation cache to a short
67  *  list of ranges for this physpage which contain code. The list is of fixed
68  *  length; to extend the list, the list should be made to point to another
69  *  list, and so forth. (Bad, O(n) find/insert complexity. Should be fixed some
70  *  day. TODO)  See definition of physpage_ranges below.
71  */
72 #define DYNTRANS_MISC_DECLARATIONS(arch,ARCH,addrtype)  struct \
73 	arch ## _instr_call {					\
74 		void	(*f)(struct cpu *, struct arch ## _instr_call *); \
75 		size_t	arg[ARCH ## _N_IC_ARGS];			\
76 	};								\
77 									\
78 	/*  Translation cache struct for each physical page:  */	\
79 	struct arch ## _tc_physpage {					\
80 		struct arch ## _instr_call ics[ARCH ## _IC_ENTRIES_PER_PAGE+2];\
81 		uint32_t	next_ofs;	/*  (0 for end of chain)  */ \
82 		uint32_t	translations_bitmap;			\
83 		uint32_t	translation_ranges_ofs;			\
84 		addrtype	physaddr;				\
85 	};								\
86 									\
87 	struct arch ## _vpg_tlb_entry {					\
88 		uint8_t		valid;					\
89 		uint8_t		writeflag;				\
90 		addrtype	vaddr_page;				\
91 		addrtype	paddr_page;				\
92 		unsigned char	*host_page;				\
93 	};
94 
95 #define	DYNTRANS_MISC64_DECLARATIONS(arch,ARCH,tlbindextype)		\
96 	struct arch ## _l3_64_table {					\
97 		unsigned char	*host_load[1 << ARCH ## _L3N];		\
98 		unsigned char	*host_store[1 << ARCH ## _L3N];		\
99 		uint64_t	phys_addr[1 << ARCH ## _L3N];		\
100 		tlbindextype	vaddr_to_tlbindex[1 << ARCH ## _L3N];	\
101 		struct arch ## _tc_physpage *phys_page[1 << ARCH ## _L3N]; \
102 		struct arch ## _l3_64_table	*next;			\
103 		int		refcount;				\
104 	};								\
105 	struct arch ## _l2_64_table {					\
106 		struct arch ## _l3_64_table	*l3[1 << ARCH ## _L2N];	\
107 		struct arch ## _l2_64_table	*next;			\
108 		int				refcount;		\
109 	};
110 
111 
112 /*
113  *  This structure contains a list of ranges within an emulated
114  *  physical page that contain translatable code.
115  */
116 #define	PHYSPAGE_RANGES_ENTRIES_PER_LIST		20
117 struct physpage_ranges {
118 	uint32_t	next_ofs;	/*  0 for end of chain  */
119 	uint32_t	n_entries_used;
120 	uint16_t	base[PHYSPAGE_RANGES_ENTRIES_PER_LIST];
121 	uint16_t	length[PHYSPAGE_RANGES_ENTRIES_PER_LIST];
122 	uint16_t	count[PHYSPAGE_RANGES_ENTRIES_PER_LIST];
123 };
124 
125 
126 /*
127  *  Dyntrans "Instruction Translation Cache":
128  *
129  *  cur_physpage is a pointer to the current physpage. (It _HAPPENS_ to
130  *  be the same as cur_ic_page, because all the instrcalls should be placed
131  *  first in the physpage struct!)
132  *
133  *  cur_ic_page is a pointer to an array of xxx_IC_ENTRIES_PER_PAGE
134  *  instruction call entries.
135  *
136  *  next_ic points to the next such instruction call to be executed.
137  *
138  *  combination_check, when set to non-NULL, is executed automatically after
139  *  an instruction has been translated. (It check for combinations of
140  *  instructions; low_addr is the offset of the translated instruction in the
141  *  current page, NOT shifted right.)
142  */
143 #define DYNTRANS_ITC(arch)	struct arch ## _tc_physpage *cur_physpage;  \
144 				struct arch ## _instr_call  *cur_ic_page;   \
145 				struct arch ## _instr_call  *next_ic;       \
146 				struct arch ## _tc_physpage *physpage_template;\
147 				void (*combination_check)(struct cpu *,     \
148 				    struct arch ## _instr_call *, int low_addr);
149 
150 /*
151  *  Virtual -> physical -> host address translation TLB entries:
152  *  ------------------------------------------------------------
153  *
154  *  Regardless of whether 32-bit or 64-bit address translation is used, the
155  *  same TLB entry structure is used.
156  */
157 #define	VPH_TLBS(arch,ARCH)						\
158 	struct arch ## _vpg_tlb_entry					\
159 	    vph_tlb_entry[ARCH ## _MAX_VPH_TLB_ENTRIES];
160 
161 /*
162  *  32-bit dyntrans emulated Virtual -> physical -> host address translation:
163  *  -------------------------------------------------------------------------
164  *
165  *  This stuff assumes that 4 KB pages are used. 20 bits to select a page
166  *  means just 1 M entries needed. This is small enough that a couple of
167  *  full-size tables can fit in virtual memory on modern hosts (both 32-bit
168  *  and 64-bit hosts). :-)
169  *
170  *  Usage: e.g. VPH32(arm,ARM)
171  *           or VPH32(sparc,SPARC)
172  *
173  *  The vph_tlb_entry entries are cpu dependent tlb entries.
174  *
175  *  The host_load and host_store entries point to host pages; the phys_addr
176  *  entries are uint32_t (emulated physical addresses).
177  *
178  *  phys_page points to translation cache physpages.
179  *
180  *  vaddr_to_tlbindex is a virtual address to tlb index hint table.
181  *  The values in this array are the tlb index plus 1, so a value of, say,
182  *  3 means tlb index 2. A value of 0 would mean a tlb index of -1, which
183  *  is not a valid index. (I.e. no hit.)
184  *
185  *  The VPH32EXTENDED variant adds an additional postfix to the array
186  *  names. Used so far only for usermode addresses in M88K emulation.
187  */
188 #define	N_VPH32_ENTRIES		1048576
189 #define	VPH32(arch,ARCH)						\
190 	unsigned char		*host_load[N_VPH32_ENTRIES];		\
191 	unsigned char		*host_store[N_VPH32_ENTRIES];		\
192 	uint32_t		phys_addr[N_VPH32_ENTRIES];		\
193 	struct arch ## _tc_physpage  *phys_page[N_VPH32_ENTRIES];	\
194 	uint8_t			vaddr_to_tlbindex[N_VPH32_ENTRIES];
195 #define	VPH32_16BITVPHENTRIES(arch,ARCH)				\
196 	unsigned char		*host_load[N_VPH32_ENTRIES];		\
197 	unsigned char		*host_store[N_VPH32_ENTRIES];		\
198 	uint32_t		phys_addr[N_VPH32_ENTRIES];		\
199 	struct arch ## _tc_physpage  *phys_page[N_VPH32_ENTRIES];	\
200 	uint16_t		vaddr_to_tlbindex[N_VPH32_ENTRIES];
201 #define	VPH32EXTENDED(arch,ARCH,ex)					\
202 	unsigned char		*host_load_ ## ex[N_VPH32_ENTRIES];	\
203 	unsigned char		*host_store_ ## ex[N_VPH32_ENTRIES];	\
204 	uint32_t		phys_addr_ ## ex[N_VPH32_ENTRIES];	\
205 	struct arch ## _tc_physpage  *phys_page_ ## ex[N_VPH32_ENTRIES];\
206 	uint8_t			vaddr_to_tlbindex_ ## ex[N_VPH32_ENTRIES];
207 
208 
209 /*
210  *  64-bit dyntrans emulated Virtual -> physical -> host address translation:
211  *  -------------------------------------------------------------------------
212  *
213  *  Usage: e.g. VPH64(alpha,ALPHA)
214  *           or VPH64(sparc,SPARC)
215  *
216  *  l1_64 is an array containing poiners to l2 tables.
217  *
218  *  l2_64_dummy is a pointer to a "dummy l2 table". Instead of having NULL
219  *  pointers in l1_64 for unused slots, a pointer to the dummy table can be
220  *  used.
221  */
222 #define	DYNTRANS_L1N		17
223 #define	VPH64(arch,ARCH)						\
224 	struct arch ## _l3_64_table	*l3_64_dummy;			\
225 	struct arch ## _l3_64_table	*next_free_l3;			\
226 	struct arch ## _l2_64_table	*l2_64_dummy;			\
227 	struct arch ## _l2_64_table	*next_free_l2;			\
228 	struct arch ## _l2_64_table	*l1_64[1 << DYNTRANS_L1N];
229 
230 
231 /*  Include all CPUs' header files here:  */
232 #include "cpu_alpha.h"
233 #include "cpu_arm.h"
234 #include "cpu_m88k.h"
235 #include "cpu_mips.h"
236 #include "cpu_ppc.h"
237 #include "cpu_sh.h"
238 
239 struct cpu;
240 struct emul;
241 struct machine;
242 struct memory;
243 struct settings;
244 
245 
246 /*
247  *  cpu_family
248  *  ----------
249  *
250  *  This structure consists of various pointers to functions, performing
251  *  architecture-specific functions.
252  *
253  *  Except for the next and arch fields at the top, all fields in the
254  *  cpu_family struct are filled in by ecah CPU family's init function.
255  */
256 struct cpu_family {
257 	struct cpu_family	*next;
258 	int			arch;
259 
260 	/*  Familty name, e.g. "MIPS", "Alpha" etc.  */
261 	char			*name;
262 
263 	/*  Fill in architecture specific parts of a struct cpu.  */
264 	int			(*cpu_new)(struct cpu *cpu, struct memory *mem,
265 				    struct machine *machine,
266 				    int cpu_id, char *cpu_type_name);
267 
268 	/*  Initialize various translation tables.  */
269 	void			(*init_tables)(struct cpu *cpu);
270 
271 	/*  List available CPU types for this architecture.  */
272 	void			(*list_available_types)(void);
273 
274 	/*  Disassemble an instruction.  */
275 	int			(*disassemble_instr)(struct cpu *cpu,
276 				    unsigned char *instr, int running,
277 				    uint64_t dumpaddr);
278 
279 	/*  Dump CPU registers in readable format.  */
280 	void			(*register_dump)(struct cpu *cpu,
281 				    int gprs, int coprocs);
282 
283 	/*  Dump generic CPU info in readable format.  */
284 	void			(*dumpinfo)(struct cpu *cpu);
285 
286 	/*  Dump TLB data for CPU id x.  */
287 	void			(*tlbdump)(struct machine *m, int x,
288 				    int rawflag);
289 
290 	/*  Print architecture-specific function call arguments.
291 	    (This is called for each function call, if running with -t.)  */
292 	void			(*functioncall_trace)(struct cpu *,
293 				    int n_args);
294 };
295 
296 
297 /*
298  *  More dyntrans stuff:
299  *
300  *  The translation cache begins with N_BASE_TABLE_ENTRIES uint32_t offsets
301  *  into the cache, for possible translation cache structs for physical pages.
302  */
303 
304 /*  Meaning of delay_slot:  */
305 #define	NOT_DELAYED			0
306 #define	DELAYED				1
307 #define	TO_BE_DELAYED			2
308 #define	EXCEPTION_IN_DELAY_SLOT		8
309 
310 #define	N_SAFE_DYNTRANS_LIMIT_SHIFT	14
311 #define	N_SAFE_DYNTRANS_LIMIT	((1 << (N_SAFE_DYNTRANS_LIMIT_SHIFT - 1)) - 1)
312 
313 #define	MAX_DYNTRANS_READAHEAD		128
314 
315 #define	DEFAULT_DYNTRANS_CACHE_SIZE	(96*1048576)
316 #define	DYNTRANS_CACHE_MARGIN		200000
317 
318 #define	N_BASE_TABLE_ENTRIES		65536
319 #define	PAGENR_TO_TABLE_INDEX(a)	((a) & (N_BASE_TABLE_ENTRIES-1))
320 
321 
322 /*
323  *  The generic CPU struct:
324  */
325 
326 struct cpu {
327 	/*  Pointer back to the machine this CPU is in:  */
328 	struct machine	*machine;
329 
330 	/*  Settings:  */
331 	struct settings *settings;
332 
333 	/*  CPU-specific name, e.g. "R2000", "21164PC", etc.  */
334 	char		*name;
335 
336 	/*  Full "path" to the CPU, e.g. "machine[0].cpu[0]":  */
337 	char		*path;
338 
339 	/*  Nr of instructions executed, etc.:  */
340 	int64_t		ninstrs;
341 	int64_t		ninstrs_show;
342 	int64_t		ninstrs_flush;
343 	int64_t		ninstrs_since_gettimeofday;
344 	struct timeval	starttime;
345 
346 	/*  EMUL_LITTLE_ENDIAN or EMUL_BIG_ENDIAN.  */
347 	uint8_t		byte_order;
348 
349 	/*  0 for emulated 64-bit CPUs, 1 for 32-bit.  */
350 	uint8_t		is_32bit;
351 
352 	/*  1 while running, 0 when paused/stopped.  */
353 	uint8_t		running;
354 
355 	/*  See comment further up.  */
356 	uint8_t		delay_slot;
357 
358 	/*  0-based CPU id, in an emulated SMP system.  */
359 	int		cpu_id;
360 
361 	/*  A pointer to the main memory connected to this CPU.  */
362 	struct memory	*mem;
363 
364 	/*  A mask describing which bits of a virtual address are used.  */
365 	uint64_t	vaddr_mask;
366 
367 	int		(*run_instr)(struct cpu *cpu);
368 	int		(*memory_rw)(struct cpu *cpu,
369 			    struct memory *mem, uint64_t vaddr,
370 			    unsigned char *data, size_t len,
371 			    int writeflag, int cache_flags);
372 	int		(*translate_v2p)(struct cpu *, uint64_t vaddr,
373 			    uint64_t *return_paddr, int flags);
374 	void		(*update_translation_table)(struct cpu *,
375 			    uint64_t vaddr_page, unsigned char *host_page,
376 			    int writeflag, uint64_t paddr_page);
377 	void		(*invalidate_translation_caches)(struct cpu *,
378 			    uint64_t paddr, int flags);
379 	void		(*invalidate_code_translation)(struct cpu *,
380 			    uint64_t paddr, int flags);
381 	void		(*useremul_syscall)(struct cpu *cpu, uint32_t code);
382 	int		(*instruction_has_delayslot)(struct cpu *cpu,
383 			    unsigned char *ib);
384 
385 	/*  The program counter. (For 32-bit modes, not all bits are used.)  */
386 	uint64_t	pc;
387 
388 	/*  The current depth of function call tracing.  */
389 	int		trace_tree_depth;
390 
391 	/*
392 	 *  If is_halted is true when an interrupt trap occurs, the pointer
393 	 *  to the next instruction to execute will be the instruction
394 	 *  following the halt instruction, not the halt instrucion itself.
395 	 *
396 	 *  If has_been_idling is true when printing the number of executed
397 	 *  instructions per second, "idling" is printed instead. (The number
398 	 *  of instrs per second when idling is meaningless anyway.)
399 	 */
400 	char		is_halted;
401 	char		has_been_idling;
402 
403 	/*
404 	 *  Dynamic translation:
405 	 *
406 	 *  The number of translated instructions is assumed to be 1 per
407 	 *  instruction call. For each case where this differs from the
408 	 *  truth, n_translated_instrs should be modified. E.g. if 1000
409 	 *  instruction calls are done, and n_translated_instrs is 50, then
410 	 *  1050 emulated instructions were actually executed.
411 	 *
412 	 *  Note that it can also be adjusted negatively, that is, the way
413 	 *  to "get out" of a dyntrans loop is to set the current instruction
414 	 *  call pointer to the "nothing" instruction. This instruction
415 	 *  _decreases_ n_translated_instrs by 1. That way, once the dyntrans
416 	 *  loop exits, only real instructions will be counted, and not the
417 	 *  "nothing" instructions.
418 	 *
419 	 *  The translation cache is a relative large chunk of memory (say,
420 	 *  32 MB) which is used for translations. When it has been used up,
421 	 *  everything restarts from scratch.
422 	 *
423 	 *  translation_readahead is non-zero when translating instructions
424 	 *  ahead of the current (emulated) instruction pointer.
425 	 */
426 
427 	int		translation_readahead;
428 
429 	/*  Instruction translation cache:  */
430 	int		n_translated_instrs;
431 	unsigned char	*translation_cache;
432 	size_t		translation_cache_cur_ofs;
433 
434 
435 	/*
436 	 *  CPU-family dependent:
437 	 *
438 	 *  These contain everything ranging from general purpose registers,
439 	 *  control registers, memory management, status words, interrupt
440 	 *  specifics, etc.
441 	 */
442 	union {
443 		struct alpha_cpu      alpha;
444 		struct arm_cpu        arm;
445 		struct m88k_cpu       m88k;
446 		struct mips_cpu       mips;
447 		struct ppc_cpu        ppc;
448 		struct sh_cpu         sh;
449 	} cd;
450 };
451 
452 
453 /*  cpu.c:  */
454 struct cpu *cpu_new(struct memory *mem, struct machine *machine,
455         int cpu_id, char *cpu_type_name);
456 void cpu_destroy(struct cpu *cpu);
457 
458 void cpu_tlbdump(struct machine *m, int x, int rawflag);
459 void cpu_register_dump(struct machine *m, struct cpu *cpu,
460 	int gprs, int coprocs);
461 int cpu_disassemble_instr(struct machine *m, struct cpu *cpu,
462 	unsigned char *instr, int running, uint64_t addr);
463 
464 void cpu_functioncall_trace(struct cpu *cpu, uint64_t f);
465 void cpu_functioncall_trace_return(struct cpu *cpu);
466 
467 void cpu_create_or_reset_tc(struct cpu *cpu);
468 
469 void cpu_run_init(struct machine *machine);
470 void cpu_run_deinit(struct machine *machine);
471 
472 void cpu_dumpinfo(struct machine *m, struct cpu *cpu);
473 void cpu_list_available_types(void);
474 void cpu_show_cycles(struct machine *machine, int forced);
475 
476 struct cpu_family *cpu_family_ptr_by_number(int arch);
477 void cpu_init(void);
478 
479 
480 #define	JUST_MARK_AS_NON_WRITABLE	1
481 #define	INVALIDATE_ALL			2
482 #define	INVALIDATE_PADDR		4
483 #define	INVALIDATE_VADDR		8
484 #define	INVALIDATE_VADDR_UPPER4		16	/*  useful for PPC emulation  */
485 
486 
487 /*  Note: 64-bit processors running in 32-bit mode use a 32-bit
488     display format, even though the underlying data is 64-bits.  */
489 #define	CPU_SETTINGS_ADD_REGISTER64(name, var)				   \
490 	settings_add(cpu->settings, name, 1, SETTINGS_TYPE_UINT64,	   \
491 	    cpu->is_32bit? SETTINGS_FORMAT_HEX32 : SETTINGS_FORMAT_HEX64,  \
492 	    (void *) &(var));
493 #define	CPU_SETTINGS_ADD_REGISTER32(name, var)				   \
494 	settings_add(cpu->settings, name, 1, SETTINGS_TYPE_UINT32,	   \
495 	    SETTINGS_FORMAT_HEX32, (void *) &(var));
496 #define	CPU_SETTINGS_ADD_REGISTER16(name, var)				   \
497 	settings_add(cpu->settings, name, 1, SETTINGS_TYPE_UINT16,	   \
498 	    SETTINGS_FORMAT_HEX16, (void *) &(var));
499 #define	CPU_SETTINGS_ADD_REGISTER8(name, var)				   \
500 	settings_add(cpu->settings, name, 1, SETTINGS_TYPE_UINT8,	   \
501 	    SETTINGS_FORMAT_HEX8, (void *) &(var));
502 
503 
504 #define CPU_FAMILY_INIT(n,s)	int n ## _cpu_family_init(		\
505 	struct cpu_family *fp) {					\
506 	/*  Fill in the cpu_family struct with valid data for this arch.  */ \
507 	fp->name = strdup(s);						\
508 	fp->cpu_new = n ## _cpu_new;					\
509 	fp->list_available_types = n ## _cpu_list_available_types;	\
510 	fp->disassemble_instr = n ## _cpu_disassemble_instr;		\
511 	fp->register_dump = n ## _cpu_register_dump;			\
512 	fp->dumpinfo = n ## _cpu_dumpinfo;				\
513 	fp->functioncall_trace = n ## _cpu_functioncall_trace;		\
514 	fp->tlbdump = n ## _cpu_tlbdump;				\
515 	fp->init_tables = n ## _cpu_init_tables;			\
516 	return 1;							\
517 	}
518 
519 
520 #endif	/*  CPU_H  */
521